82 lines
2.4 KiB
JavaScript
82 lines
2.4 KiB
JavaScript
const puppeteer = require('puppeteer');
|
|
|
|
exports.BaseScraper = class BaseScraper {
|
|
constructor() {
|
|
this.browser = null;
|
|
this.pages = [];
|
|
this.pages_response = [];
|
|
}
|
|
|
|
async init() {
|
|
this.browser = await puppeteer.launch({
|
|
// headless: false,
|
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
});
|
|
}
|
|
|
|
async openPage(url) {
|
|
const page = await this.browser.newPage();
|
|
|
|
page.on('response', async (response) => {
|
|
const pageIndex = this.pages.indexOf(page);
|
|
|
|
if (response.url().includes('https://res.colamanga.com')) {
|
|
this.pages_response[pageIndex] = await response.buffer();
|
|
}
|
|
else if (/blob:https:\/\/www\.colamanga\.com\//.test(response.url())) {
|
|
if (!this.pages_response[pageIndex]) {
|
|
this.pages_response[pageIndex] = {};
|
|
}
|
|
this.pages_response[pageIndex][response.url()] = await response.buffer();
|
|
}
|
|
|
|
});
|
|
|
|
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
this.pages.push(page);
|
|
return page;
|
|
}
|
|
|
|
|
|
async closePage(page) {
|
|
const pageIndex = this.pages.indexOf(page);
|
|
if (pageIndex !== -1) {
|
|
this.pages.splice(pageIndex, 1);
|
|
this.pages_response.splice(pageIndex, 1);
|
|
}
|
|
await page.close();
|
|
}
|
|
|
|
async closeAllPages() {
|
|
await Promise.all(this.pages.map(page => page.close()));
|
|
this.pages = [];
|
|
this.pages_response = [];
|
|
await this.browser.close();
|
|
}
|
|
|
|
async loadPage(url) {
|
|
const page = await this.openPage(url);
|
|
return page;
|
|
}
|
|
|
|
async scrollPage(page) {
|
|
await page.evaluate(async () => {
|
|
window.scrollTo(0, 0);
|
|
await new Promise((resolve) => {
|
|
const distance = 100; // distance to scroll
|
|
const delay = 100; // delay between scrolls
|
|
const scrollInterval = setInterval(() => {
|
|
window.scrollBy(0, distance);
|
|
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight) {
|
|
clearInterval(scrollInterval);
|
|
resolve();
|
|
}
|
|
}, delay);
|
|
});
|
|
});
|
|
}
|
|
|
|
}
|
|
|
|
|