sunnymh-scraper/scraper.js
2025-03-27 17:33:38 +08:00

82 lines
2.4 KiB
JavaScript

const puppeteer = require('puppeteer');
exports.BaseScraper = class BaseScraper {
constructor() {
this.browser = null;
this.pages = [];
this.pages_response = [];
}
async init() {
this.browser = await puppeteer.launch({
// headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
}
async openPage(url) {
const page = await this.browser.newPage();
page.on('response', async (response) => {
const pageIndex = this.pages.indexOf(page);
if (response.url().includes('https://res.colamanga.com')) {
this.pages_response[pageIndex] = await response.buffer();
}
else if (/blob:https:\/\/www\.colamanga\.com\//.test(response.url())) {
if (!this.pages_response[pageIndex]) {
this.pages_response[pageIndex] = {};
}
this.pages_response[pageIndex][response.url()] = await response.buffer();
}
});
await page.goto(url, { waitUntil: 'domcontentloaded' });
this.pages.push(page);
return page;
}
async closePage(page) {
const pageIndex = this.pages.indexOf(page);
if (pageIndex !== -1) {
this.pages.splice(pageIndex, 1);
this.pages_response.splice(pageIndex, 1);
}
await page.close();
}
async closeAllPages() {
await Promise.all(this.pages.map(page => page.close()));
this.pages = [];
this.pages_response = [];
await this.browser.close();
}
async loadPage(url) {
const page = await this.openPage(url);
return page;
}
async scrollPage(page) {
await page.evaluate(async () => {
window.scrollTo(0, 0);
await new Promise((resolve) => {
const distance = 100; // distance to scroll
const delay = 100; // delay between scrolls
const scrollInterval = setInterval(() => {
window.scrollBy(0, distance);
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight) {
clearInterval(scrollInterval);
resolve();
}
}, delay);
});
});
}
}