Updated scraper for new server
This commit is contained in:
parent
a3bf75bd36
commit
66b78bca1d
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
/node_modules
|
/node_modules
|
||||||
/test
|
/test
|
||||||
|
.env
|
||||||
@ -1,131 +0,0 @@
|
|||||||
const { BaseScraper } = require('./scraper.js');
|
|
||||||
const sharp = require('sharp');
|
|
||||||
const fs = require('fs');
|
|
||||||
const path = require('path');
|
|
||||||
|
|
||||||
class ColaMangaScraper extends BaseScraper {
|
|
||||||
constructor() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
async saveBufferAsWebp(buffer, filename, dir = '.') {
|
|
||||||
const dirPath = path.resolve(dir);
|
|
||||||
const filePath = path.join(dirPath, filename);
|
|
||||||
|
|
||||||
try {
|
|
||||||
await fs.promises.mkdir(dirPath, { recursive: true });
|
|
||||||
await sharp(buffer).webp({ quality: 80 }).toFormat('webp').toFile(filePath);
|
|
||||||
} catch (error) {
|
|
||||||
console.error(`Failed to save ${filename}:`, error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async getMangaInfo(mangaUrl) {
|
|
||||||
const page = await this.loadPage(mangaUrl);
|
|
||||||
await page.waitForSelector('.fed-deta-info', { visible: true });
|
|
||||||
const mangaName = await page.$eval('.fed-deta-content h1', el => el.textContent);
|
|
||||||
const elements = await page.$$('.fed-deta-content li');
|
|
||||||
const mangaInfo = {
|
|
||||||
name: mangaName,
|
|
||||||
author: '',
|
|
||||||
nickNames: [],
|
|
||||||
genres: [],
|
|
||||||
status: '',
|
|
||||||
chapters: []
|
|
||||||
};
|
|
||||||
for (const el of elements) {
|
|
||||||
const span = await el.$eval('span', el => el.textContent.trim());
|
|
||||||
if (span === '状态') {
|
|
||||||
mangaInfo.status = await el.$eval('a', el => el.textContent);
|
|
||||||
} else if (span === '作者') {
|
|
||||||
mangaInfo.author = await el.$eval('a', el => el.textContent);
|
|
||||||
} else if (span === '别名') {
|
|
||||||
mangaInfo.nickNames = await el.$$eval('a', els => els.map(el => el.textContent));
|
|
||||||
} else if (span === '类别') {
|
|
||||||
mangaInfo.genres = await el.$$eval('a', els => els.map(el => el.textContent));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
const chapterElements = await page.$$('.all_data_list li');
|
|
||||||
mangaInfo.chapters = await Promise.all(chapterElements.map(async el => {
|
|
||||||
const chapterName = await el.$eval('a', el => el.textContent);
|
|
||||||
const chapterUrl = await el.$eval('a', el => el.getAttribute('href'));
|
|
||||||
return {
|
|
||||||
name: chapterName,
|
|
||||||
url: chapterUrl
|
|
||||||
};
|
|
||||||
}));
|
|
||||||
while (!this.pages_response[this.pages.indexOf(page)]) {
|
|
||||||
await new Promise(resolve => setTimeout(resolve, 100));
|
|
||||||
}
|
|
||||||
mangaInfo.coverPic = this.pages_response[this.pages.indexOf(page)];
|
|
||||||
await this.closePage(page);
|
|
||||||
return mangaInfo;
|
|
||||||
}
|
|
||||||
|
|
||||||
async downloadChapterPics(chapter, chapterDir = ".") {
|
|
||||||
const directoryPath = path.resolve(chapterDir);
|
|
||||||
if (fs.existsSync(directoryPath)) {
|
|
||||||
console.log(`Skipping ${chapter.name} as it already exists`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
fs.mkdirSync(directoryPath, { recursive: true });
|
|
||||||
|
|
||||||
const page = await this.loadPage(chapter.url);
|
|
||||||
const pageIndex = this.pages.indexOf(page);
|
|
||||||
await page.waitForSelector('.mh_mangalist', { visible: true });
|
|
||||||
|
|
||||||
for (let attempt = 0; attempt < 10; attempt++) {
|
|
||||||
console.log(`Downloading ${chapter.name}, attempt ${attempt + 1}`);
|
|
||||||
await this.scrollPage(page);
|
|
||||||
const loadingElements = await page.$$eval('.mh_loading:not([style*="display: none"])', elements => elements.length);
|
|
||||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
||||||
if (loadingElements === 0 && Object.keys(this.pages_response[pageIndex]).length !== 0) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const responses = this.pages_response[pageIndex];
|
|
||||||
for (const [url, response] of Object.entries(responses)) {
|
|
||||||
const fileName = (await this.getImgOrder(page, url)) + '.webp';
|
|
||||||
const buffer = await (new Blob([response], { type: 'image/webp' })).arrayBuffer();
|
|
||||||
await this.saveBufferToWebp(buffer, fileName, chapterDir);
|
|
||||||
}
|
|
||||||
|
|
||||||
await this.closePage(page);
|
|
||||||
}
|
|
||||||
|
|
||||||
async downloadChapter(chapters, dir = ".") {
|
|
||||||
const dirPath = path.resolve(dir);
|
|
||||||
if (!fs.existsSync(dirPath)) {
|
|
||||||
fs.mkdirSync(dirPath, { recursive: true });
|
|
||||||
}
|
|
||||||
// const chapter = chapters[Math.floor(Math.random() * chapters.length)];
|
|
||||||
for (const chapter of chapters) {
|
|
||||||
await this.downloadChapterPics(chapter, path.join(dir, chapter.name));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
async getImgOrder(page, src) {
|
|
||||||
const loadingAttributes = await page.$$eval('.mh_comicpic', (elements, src) => {
|
|
||||||
return elements
|
|
||||||
.filter(el => el.querySelector(`img[src="${src}"]`))
|
|
||||||
.map(el => el.getAttribute('p'))
|
|
||||||
.map(str => str.padStart(elements.length.toString().length, '0'));
|
|
||||||
}, src);
|
|
||||||
return loadingAttributes;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
(async () => {
|
|
||||||
const scraper = new ColaMangaScraper();
|
|
||||||
await scraper.init();
|
|
||||||
const mangaUrl = 'https://www.colamanga.com/manga-od825111/';
|
|
||||||
const mangaInfo = await scraper.getMangaInfo(mangaUrl);
|
|
||||||
await scraper.saveBufferToWebp(mangaInfo.coverPic, 'cover.webp', 'test');
|
|
||||||
await scraper.downloadChapter(mangaInfo.chapters, 'test');
|
|
||||||
await scraper.closeAllPages();
|
|
||||||
console.log(mangaInfo);
|
|
||||||
})();
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
8
index.js
8
index.js
@ -1,8 +0,0 @@
|
|||||||
const axios = require('axios');
|
|
||||||
|
|
||||||
const postData = {
|
|
||||||
mangaId: 'yourMangaId',
|
|
||||||
mangaName: 'yourMangaName'
|
|
||||||
};
|
|
||||||
|
|
||||||
axios.post('http://localhost:4000/insert', postData)
|
|
||||||
231
package-lock.json
generated
231
package-lock.json
generated
@ -1,18 +1,19 @@
|
|||||||
{
|
{
|
||||||
"name": "sunnymh-scrap",
|
"name": "sunnymh-scraper",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"lockfileVersion": 3,
|
"lockfileVersion": 3,
|
||||||
"requires": true,
|
"requires": true,
|
||||||
"packages": {
|
"packages": {
|
||||||
"": {
|
"": {
|
||||||
"name": "sunnymh-scrap",
|
"name": "sunnymh-scraper",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.8.4",
|
"axios": "^1.6.7",
|
||||||
"fs": "^0.0.1-security",
|
"dotenv": "^16.5.0",
|
||||||
"puppeteer": "^24.4.0",
|
"puppeteer": "^22.0.0",
|
||||||
"sharp": "^0.33.5"
|
"sharp": "^0.33.2",
|
||||||
|
"uuid": "^9.0.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@babel/code-frame": {
|
"node_modules/@babel/code-frame": {
|
||||||
@ -410,17 +411,18 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@puppeteer/browsers": {
|
"node_modules/@puppeteer/browsers": {
|
||||||
"version": "2.8.0",
|
"version": "2.3.0",
|
||||||
"resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.8.0.tgz",
|
"resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.3.0.tgz",
|
||||||
"integrity": "sha512-yTwt2KWRmCQAfhvbCRjebaSX8pV1//I0Y3g+A7f/eS7gf0l4eRJoUCvcYdVtboeU4CTOZQuqYbZNS8aBYb8ROQ==",
|
"integrity": "sha512-ioXoq9gPxkss4MYhD+SFaU9p1IHFUX0ILAWFPyjGaBdjLsYAlZw6j1iLA0N/m12uVHLFDfSYNF7EQccjinIMDA==",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"debug": "^4.4.0",
|
"debug": "^4.3.5",
|
||||||
"extract-zip": "^2.0.1",
|
"extract-zip": "^2.0.1",
|
||||||
"progress": "^2.0.3",
|
"progress": "^2.0.3",
|
||||||
"proxy-agent": "^6.5.0",
|
"proxy-agent": "^6.4.0",
|
||||||
"semver": "^7.7.1",
|
"semver": "^7.6.3",
|
||||||
"tar-fs": "^3.0.8",
|
"tar-fs": "^3.0.6",
|
||||||
|
"unbzip2-stream": "^1.4.3",
|
||||||
"yargs": "^17.7.2"
|
"yargs": "^17.7.2"
|
||||||
},
|
},
|
||||||
"bin": {
|
"bin": {
|
||||||
@ -437,13 +439,13 @@
|
|||||||
"license": "MIT"
|
"license": "MIT"
|
||||||
},
|
},
|
||||||
"node_modules/@types/node": {
|
"node_modules/@types/node": {
|
||||||
"version": "22.13.13",
|
"version": "22.15.15",
|
||||||
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.13.tgz",
|
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.15.15.tgz",
|
||||||
"integrity": "sha512-ClsL5nMwKaBRwPcCvH8E7+nU4GxHVx1axNvMZTFHMEfNI7oahimt26P5zjVCRrjiIWj6YFXfE1v3dEp94wLcGQ==",
|
"integrity": "sha512-R5muMcZob3/Jjchn5LcO8jdKwSCbzqmPB6ruBxMcf9kbxtniZHP327s6C37iOfuw8mbKK3cAQa7sEl7afLrQ8A==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"undici-types": "~6.20.0"
|
"undici-types": "~6.21.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/@types/yauzl": {
|
"node_modules/@types/yauzl": {
|
||||||
@ -538,9 +540,9 @@
|
|||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
"node_modules/bare-fs": {
|
"node_modules/bare-fs": {
|
||||||
"version": "4.0.2",
|
"version": "4.1.4",
|
||||||
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.0.2.tgz",
|
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.1.4.tgz",
|
||||||
"integrity": "sha512-S5mmkMesiduMqnz51Bfh0Et9EX0aTCJxhsI4bvzFFLs8Z1AV8RDHadfY5CyLwdoLHgXbNBEN1gQcbEtGwuvixw==",
|
"integrity": "sha512-r8+26Voz8dGX3AYpJdFb1ZPaUSM8XOLCZvy+YGpRTmwPHIxA7Z3Jov/oMPtV7hfRQbOnH8qGlLTzQAbgtdNN0Q==",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"optional": true,
|
"optional": true,
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
@ -602,6 +604,26 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/base64-js": {
|
||||||
|
"version": "1.5.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
|
||||||
|
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://www.patreon.com/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "consulting",
|
||||||
|
"url": "https://feross.org/support"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/basic-ftp": {
|
"node_modules/basic-ftp": {
|
||||||
"version": "5.0.5",
|
"version": "5.0.5",
|
||||||
"resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz",
|
"resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz",
|
||||||
@ -611,6 +633,30 @@
|
|||||||
"node": ">=10.0.0"
|
"node": ">=10.0.0"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/buffer": {
|
||||||
|
"version": "5.7.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
|
||||||
|
"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://www.patreon.com/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "consulting",
|
||||||
|
"url": "https://feross.org/support"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"base64-js": "^1.3.1",
|
||||||
|
"ieee754": "^1.1.13"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/buffer-crc32": {
|
"node_modules/buffer-crc32": {
|
||||||
"version": "0.2.13",
|
"version": "0.2.13",
|
||||||
"resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
|
"resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
|
||||||
@ -643,13 +689,14 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/chromium-bidi": {
|
"node_modules/chromium-bidi": {
|
||||||
"version": "2.1.2",
|
"version": "0.6.3",
|
||||||
"resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-2.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.6.3.tgz",
|
||||||
"integrity": "sha512-vtRWBK2uImo5/W2oG6/cDkkHSm+2t6VHgnj+Rcwhb0pP74OoUb4GipyRX/T/y39gYQPhioP0DPShn+A7P6CHNw==",
|
"integrity": "sha512-qXlsCmpCZJAnoTYI83Iu6EdYQpMYdVkCfq08KDh2pmlVqK5t5IA9mGs4/LwCwp4fqisSOMXZxP3HIh8w8aRn0A==",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"mitt": "^3.0.1",
|
"mitt": "3.0.1",
|
||||||
"zod": "^3.24.1"
|
"urlpattern-polyfill": "10.0.0",
|
||||||
|
"zod": "3.23.8"
|
||||||
},
|
},
|
||||||
"peerDependencies": {
|
"peerDependencies": {
|
||||||
"devtools-protocol": "*"
|
"devtools-protocol": "*"
|
||||||
@ -807,11 +854,23 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/devtools-protocol": {
|
"node_modules/devtools-protocol": {
|
||||||
"version": "0.0.1413902",
|
"version": "0.0.1312386",
|
||||||
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1413902.tgz",
|
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1312386.tgz",
|
||||||
"integrity": "sha512-yRtvFD8Oyk7C9Os3GmnFZLu53yAfsnyw1s+mLmHHUK0GQEc9zthHWvS1r67Zqzm5t7v56PILHIVZ7kmFMaL2yQ==",
|
"integrity": "sha512-DPnhUXvmvKT2dFA/j7B+riVLUt9Q6RKJlcppojL5CoRywJJKLDYnRlw0gTFKfgDPHP5E04UoB71SxoJlVZy8FA==",
|
||||||
"license": "BSD-3-Clause"
|
"license": "BSD-3-Clause"
|
||||||
},
|
},
|
||||||
|
"node_modules/dotenv": {
|
||||||
|
"version": "16.5.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.5.0.tgz",
|
||||||
|
"integrity": "sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==",
|
||||||
|
"license": "BSD-2-Clause",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=12"
|
||||||
|
},
|
||||||
|
"funding": {
|
||||||
|
"url": "https://dotenvx.com"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/dunder-proto": {
|
"node_modules/dunder-proto": {
|
||||||
"version": "1.0.1",
|
"version": "1.0.1",
|
||||||
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
|
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
|
||||||
@ -1035,12 +1094,6 @@
|
|||||||
"node": ">= 6"
|
"node": ">= 6"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/fs": {
|
|
||||||
"version": "0.0.1-security",
|
|
||||||
"resolved": "https://registry.npmjs.org/fs/-/fs-0.0.1-security.tgz",
|
|
||||||
"integrity": "sha512-3XY9e1pP0CVEUCdj5BmfIZxRBTSDycnbqhIOGec9QYtmVH2fbLpj86CFWkrNOkt/Fvty4KZG5lTglL9j/gJ87w==",
|
|
||||||
"license": "ISC"
|
|
||||||
},
|
|
||||||
"node_modules/function-bind": {
|
"node_modules/function-bind": {
|
||||||
"version": "1.1.2",
|
"version": "1.1.2",
|
||||||
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
|
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
|
||||||
@ -1202,6 +1255,26 @@
|
|||||||
"node": ">= 14"
|
"node": ">= 14"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/ieee754": {
|
||||||
|
"version": "1.2.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
|
||||||
|
"integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
|
||||||
|
"funding": [
|
||||||
|
{
|
||||||
|
"type": "github",
|
||||||
|
"url": "https://github.com/sponsors/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "patreon",
|
||||||
|
"url": "https://www.patreon.com/feross"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"type": "consulting",
|
||||||
|
"url": "https://feross.org/support"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"license": "BSD-3-Clause"
|
||||||
|
},
|
||||||
"node_modules/import-fresh": {
|
"node_modules/import-fresh": {
|
||||||
"version": "3.3.1",
|
"version": "3.3.1",
|
||||||
"resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz",
|
"resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz",
|
||||||
@ -1470,38 +1543,35 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/puppeteer": {
|
"node_modules/puppeteer": {
|
||||||
"version": "24.4.0",
|
"version": "22.15.0",
|
||||||
"resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-24.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-22.15.0.tgz",
|
||||||
"integrity": "sha512-E4JhJzjS8AAI+6N/b+Utwarhz6zWl3+MR725fal+s3UlOlX2eWdsvYYU+Q5bXMjs9eZEGkNQroLkn7j11s2k1Q==",
|
"integrity": "sha512-XjCY1SiSEi1T7iSYuxS82ft85kwDJUS7wj1Z0eGVXKdtr5g4xnVcbjwxhq5xBnpK/E7x1VZZoJDxpjAOasHT4Q==",
|
||||||
"hasInstallScript": true,
|
"hasInstallScript": true,
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@puppeteer/browsers": "2.8.0",
|
"@puppeteer/browsers": "2.3.0",
|
||||||
"chromium-bidi": "2.1.2",
|
|
||||||
"cosmiconfig": "^9.0.0",
|
"cosmiconfig": "^9.0.0",
|
||||||
"devtools-protocol": "0.0.1413902",
|
"devtools-protocol": "0.0.1312386",
|
||||||
"puppeteer-core": "24.4.0",
|
"puppeteer-core": "22.15.0"
|
||||||
"typed-query-selector": "^2.12.0"
|
|
||||||
},
|
},
|
||||||
"bin": {
|
"bin": {
|
||||||
"puppeteer": "lib/cjs/puppeteer/node/cli.js"
|
"puppeteer": "lib/esm/puppeteer/node/cli.js"
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=18"
|
"node": ">=18"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/puppeteer-core": {
|
"node_modules/puppeteer-core": {
|
||||||
"version": "24.4.0",
|
"version": "22.15.0",
|
||||||
"resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.4.0.tgz",
|
"resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.15.0.tgz",
|
||||||
"integrity": "sha512-eFw66gCnWo0X8Hyf9KxxJtms7a61NJVMiSaWfItsFPzFBsjsWdmcNlBdsA1WVwln6neoHhsG+uTVesKmTREn/g==",
|
"integrity": "sha512-cHArnywCiAAVXa3t4GGL2vttNxh7GqXtIYGym99egkNJ3oG//wL9LkvO4WE8W1TJe95t1F1ocu9X4xWaGsOKOA==",
|
||||||
"license": "Apache-2.0",
|
"license": "Apache-2.0",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@puppeteer/browsers": "2.8.0",
|
"@puppeteer/browsers": "2.3.0",
|
||||||
"chromium-bidi": "2.1.2",
|
"chromium-bidi": "0.6.3",
|
||||||
"debug": "^4.4.0",
|
"debug": "^4.3.6",
|
||||||
"devtools-protocol": "0.0.1413902",
|
"devtools-protocol": "0.0.1312386",
|
||||||
"typed-query-selector": "^2.12.0",
|
"ws": "^8.18.0"
|
||||||
"ws": "^8.18.1"
|
|
||||||
},
|
},
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=18"
|
"node": ">=18"
|
||||||
@ -1718,25 +1788,54 @@
|
|||||||
"b4a": "^1.6.4"
|
"b4a": "^1.6.4"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
"node_modules/through": {
|
||||||
|
"version": "2.3.8",
|
||||||
|
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
|
||||||
|
"integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
"node_modules/tslib": {
|
"node_modules/tslib": {
|
||||||
"version": "2.8.1",
|
"version": "2.8.1",
|
||||||
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
|
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
|
||||||
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
|
||||||
"license": "0BSD"
|
"license": "0BSD"
|
||||||
},
|
},
|
||||||
"node_modules/typed-query-selector": {
|
"node_modules/unbzip2-stream": {
|
||||||
"version": "2.12.0",
|
"version": "1.4.3",
|
||||||
"resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.0.tgz",
|
"resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz",
|
||||||
"integrity": "sha512-SbklCd1F0EiZOyPiW192rrHZzZ5sBijB6xM+cpmrwDqObvdtunOHHIk9fCGsoK5JVIYXoyEp4iEdE3upFH3PAg==",
|
"integrity": "sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==",
|
||||||
"license": "MIT"
|
"license": "MIT",
|
||||||
|
"dependencies": {
|
||||||
|
"buffer": "^5.2.1",
|
||||||
|
"through": "^2.3.8"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"node_modules/undici-types": {
|
"node_modules/undici-types": {
|
||||||
"version": "6.20.0",
|
"version": "6.21.0",
|
||||||
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz",
|
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
|
||||||
"integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==",
|
"integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"optional": true
|
"optional": true
|
||||||
},
|
},
|
||||||
|
"node_modules/urlpattern-polyfill": {
|
||||||
|
"version": "10.0.0",
|
||||||
|
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
|
||||||
|
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==",
|
||||||
|
"license": "MIT"
|
||||||
|
},
|
||||||
|
"node_modules/uuid": {
|
||||||
|
"version": "9.0.1",
|
||||||
|
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
|
||||||
|
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
|
||||||
|
"funding": [
|
||||||
|
"https://github.com/sponsors/broofa",
|
||||||
|
"https://github.com/sponsors/ctavan"
|
||||||
|
],
|
||||||
|
"license": "MIT",
|
||||||
|
"bin": {
|
||||||
|
"uuid": "dist/bin/uuid"
|
||||||
|
}
|
||||||
|
},
|
||||||
"node_modules/wrap-ansi": {
|
"node_modules/wrap-ansi": {
|
||||||
"version": "7.0.0",
|
"version": "7.0.0",
|
||||||
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
|
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
|
||||||
@ -1761,9 +1860,9 @@
|
|||||||
"license": "ISC"
|
"license": "ISC"
|
||||||
},
|
},
|
||||||
"node_modules/ws": {
|
"node_modules/ws": {
|
||||||
"version": "8.18.1",
|
"version": "8.18.2",
|
||||||
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.1.tgz",
|
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz",
|
||||||
"integrity": "sha512-RKW2aJZMXeMxVpnZ6bck+RswznaxmzdULiBr6KY7XkTnW8uvt0iT9H5DkHUChXrc+uurzwa0rVI16n/Xzjdz1w==",
|
"integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"engines": {
|
"engines": {
|
||||||
"node": ">=10.0.0"
|
"node": ">=10.0.0"
|
||||||
@ -1828,9 +1927,9 @@
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"node_modules/zod": {
|
"node_modules/zod": {
|
||||||
"version": "3.24.2",
|
"version": "3.23.8",
|
||||||
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz",
|
"resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz",
|
||||||
"integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==",
|
"integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==",
|
||||||
"license": "MIT",
|
"license": "MIT",
|
||||||
"funding": {
|
"funding": {
|
||||||
"url": "https://github.com/sponsors/colinhacks"
|
"url": "https://github.com/sponsors/colinhacks"
|
||||||
|
|||||||
22
package.json
22
package.json
@ -1,18 +1,24 @@
|
|||||||
{
|
{
|
||||||
"name": "sunnymh-scrap",
|
"name": "sunnymh-scraper",
|
||||||
"version": "1.0.0",
|
"version": "1.0.0",
|
||||||
"main": "index.cjs",
|
"description": "Manga scraper",
|
||||||
|
"main": "src/index.js",
|
||||||
"scripts": {
|
"scripts": {
|
||||||
"start": "node index.cjs",
|
"start": "node src/index.js",
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
},
|
},
|
||||||
|
"keywords": [
|
||||||
|
"manga",
|
||||||
|
"scraper",
|
||||||
|
"web-scraping"
|
||||||
|
],
|
||||||
"author": "",
|
"author": "",
|
||||||
"license": "ISC",
|
"license": "ISC",
|
||||||
"description": "",
|
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"axios": "^1.8.4",
|
"axios": "^1.6.7",
|
||||||
"fs": "^0.0.1-security",
|
"dotenv": "^16.5.0",
|
||||||
"puppeteer": "^24.4.0",
|
"puppeteer": "^22.0.0",
|
||||||
"sharp": "^0.33.5"
|
"sharp": "^0.33.2",
|
||||||
|
"uuid": "^9.0.1"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
81
scraper.js
81
scraper.js
@ -1,81 +0,0 @@
|
|||||||
const puppeteer = require('puppeteer');
|
|
||||||
|
|
||||||
exports.BaseScraper = class BaseScraper {
|
|
||||||
constructor() {
|
|
||||||
this.browser = null;
|
|
||||||
this.pages = [];
|
|
||||||
this.pages_response = [];
|
|
||||||
}
|
|
||||||
|
|
||||||
async init() {
|
|
||||||
this.browser = await puppeteer.launch({
|
|
||||||
// headless: false,
|
|
||||||
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
async openPage(url) {
|
|
||||||
const page = await this.browser.newPage();
|
|
||||||
|
|
||||||
page.on('response', async (response) => {
|
|
||||||
const pageIndex = this.pages.indexOf(page);
|
|
||||||
|
|
||||||
if (response.url().includes('https://res.colamanga.com')) {
|
|
||||||
this.pages_response[pageIndex] = await response.buffer();
|
|
||||||
}
|
|
||||||
else if (/blob:https:\/\/www\.colamanga\.com\//.test(response.url())) {
|
|
||||||
if (!this.pages_response[pageIndex]) {
|
|
||||||
this.pages_response[pageIndex] = {};
|
|
||||||
}
|
|
||||||
this.pages_response[pageIndex][response.url()] = await response.buffer();
|
|
||||||
}
|
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
await page.goto(url, { waitUntil: 'domcontentloaded' });
|
|
||||||
this.pages.push(page);
|
|
||||||
return page;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
async closePage(page) {
|
|
||||||
const pageIndex = this.pages.indexOf(page);
|
|
||||||
if (pageIndex !== -1) {
|
|
||||||
this.pages.splice(pageIndex, 1);
|
|
||||||
this.pages_response.splice(pageIndex, 1);
|
|
||||||
}
|
|
||||||
await page.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
async closeAllPages() {
|
|
||||||
await Promise.all(this.pages.map(page => page.close()));
|
|
||||||
this.pages = [];
|
|
||||||
this.pages_response = [];
|
|
||||||
await this.browser.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
async loadPage(url) {
|
|
||||||
const page = await this.openPage(url);
|
|
||||||
return page;
|
|
||||||
}
|
|
||||||
|
|
||||||
async scrollPage(page) {
|
|
||||||
await page.evaluate(async () => {
|
|
||||||
window.scrollTo(0, 0);
|
|
||||||
await new Promise((resolve) => {
|
|
||||||
const distance = 100; // distance to scroll
|
|
||||||
const delay = 100; // delay between scrolls
|
|
||||||
const scrollInterval = setInterval(() => {
|
|
||||||
window.scrollBy(0, distance);
|
|
||||||
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight) {
|
|
||||||
clearInterval(scrollInterval);
|
|
||||||
resolve();
|
|
||||||
}
|
|
||||||
}, delay);
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
81
src/config/config.js
Normal file
81
src/config/config.js
Normal file
@ -0,0 +1,81 @@
|
|||||||
|
/**
|
||||||
|
* Configuration module for the manga scraper
|
||||||
|
*/
|
||||||
|
class Config {
|
||||||
|
constructor() {
|
||||||
|
this.loadConfig();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Load configuration from environment variables with defaults
|
||||||
|
*/
|
||||||
|
loadConfig() {
|
||||||
|
this.baseDir = process.env.MANGA_BASE_DIR;
|
||||||
|
this.apiBaseUrl = process.env.API_URL;
|
||||||
|
this.mangaUrlsPath = process.env.MANGA_URLS_PATH
|
||||||
|
|
||||||
|
// API endpoints
|
||||||
|
this.endpoints = {
|
||||||
|
insertMangaInfo: `${this.apiBaseUrl}/manga/insertMangaInfo`,
|
||||||
|
insertMangaChapter: `${this.apiBaseUrl}/manga/insertMangaChapter`,
|
||||||
|
insertMangaGenres: `${this.apiBaseUrl}/manga/insertMangaGenres`,
|
||||||
|
insertGenre: `${this.apiBaseUrl}/genre/insertGenre`,
|
||||||
|
getGenreId: `${this.apiBaseUrl}/genre/getGenreId`,
|
||||||
|
getMangaId: `${this.apiBaseUrl}/manga/getMangaId`
|
||||||
|
};
|
||||||
|
|
||||||
|
// Scraper settings
|
||||||
|
this.scraper = {
|
||||||
|
headless: process.env.SCRAPER_HEADLESS === 'true' ? true : false,
|
||||||
|
timeout: parseInt(process.env.SCRAPER_TIMEOUT, 10),
|
||||||
|
retryAttempts: parseInt(process.env.SCRAPER_RETRY_ATTEMPTS, 10),
|
||||||
|
retryDelay: parseInt(process.env.SCRAPER_RETRY_DELAY, 10)
|
||||||
|
};
|
||||||
|
|
||||||
|
// Download settings
|
||||||
|
this.download = {
|
||||||
|
imageQuality: parseInt(process.env.IMAGE_QUALITY, 10),
|
||||||
|
maxConcurrentDownloads: parseInt(process.env.MAX_CONCURRENT_DOWNLOADS, 10),
|
||||||
|
downloadDelay: parseInt(process.env.DOWNLOAD_DELAY, 10)
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get API endpoint URL
|
||||||
|
* @param {string} endpoint - Endpoint name
|
||||||
|
* @returns {string} Full API URL
|
||||||
|
*/
|
||||||
|
getApiUrl(endpoint) {
|
||||||
|
if (!this.endpoints[endpoint]) {
|
||||||
|
throw new Error(`Unknown API endpoint: ${endpoint}`);
|
||||||
|
}
|
||||||
|
return this.endpoints[endpoint];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get scraper setting
|
||||||
|
* @param {string} setting - Setting name
|
||||||
|
* @returns {any} Setting value
|
||||||
|
*/
|
||||||
|
getScraperSetting(setting) {
|
||||||
|
if (!(setting in this.scraper)) {
|
||||||
|
throw new Error(`Unknown scraper setting: ${setting}`);
|
||||||
|
}
|
||||||
|
return this.scraper[setting];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get download setting
|
||||||
|
* @param {string} setting - Setting name
|
||||||
|
* @returns {any} Setting value
|
||||||
|
*/
|
||||||
|
getDownloadSetting(setting) {
|
||||||
|
if (!(setting in this.download)) {
|
||||||
|
throw new Error(`Unknown download setting: ${setting}`);
|
||||||
|
}
|
||||||
|
return this.download[setting];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Export singleton instance
|
||||||
|
module.exports = new Config();
|
||||||
134
src/downloaders/MangaDownloader.js
Normal file
134
src/downloaders/MangaDownloader.js
Normal file
@ -0,0 +1,134 @@
|
|||||||
|
const fs = require('fs').promises;
|
||||||
|
const path = require('path');
|
||||||
|
const helpers = require('../utils/helpers');
|
||||||
|
const ColaMangaScraper = require('../scrapers/ColaMangaScraper');
|
||||||
|
const MangaPoster = require('../posters/MangaPoster');
|
||||||
|
const config = require('../config/config');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main class for downloading manga
|
||||||
|
*/
|
||||||
|
class MangaDownloader {
|
||||||
|
constructor() {
|
||||||
|
this.outputDir = config.baseDir;
|
||||||
|
this.maxConcurrentManga = config.getDownloadSetting('maxConcurrentDownloads');
|
||||||
|
this.mangaPoster = new MangaPoster();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the downloader
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async init_colamanga_scraper() {
|
||||||
|
this.mangaUrls = await helpers.readJsonFile(config.mangaUrlsPath) || {};
|
||||||
|
this.scraper = new ColaMangaScraper();
|
||||||
|
await this.scraper.init();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Download a single manga with memory management
|
||||||
|
* @param {string} mangaName - Manga name
|
||||||
|
* @param {string} mangaUrl - Manga URL
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async downloadManga(mangaName, mangaUrl) {
|
||||||
|
console.log(`Getting Manga: ${mangaName} info`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const mangaDetails = await this.scraper.getMangaInfo(mangaUrl);
|
||||||
|
|
||||||
|
// // Save manga info
|
||||||
|
// const infoPath = path.join(this.outputDir, 'info.json');
|
||||||
|
// await fs.writeFile(infoPath, JSON.stringify(mangaDetails, null, 2));
|
||||||
|
|
||||||
|
// Load manga info
|
||||||
|
// const mangaDetails = await helpers.readJsonFile(path.join(this.outputDir, 'info.json'));
|
||||||
|
|
||||||
|
const mangaDir = path.join(this.outputDir, mangaName);
|
||||||
|
await helpers.ensureDirectory(mangaDir);
|
||||||
|
|
||||||
|
// Save cover image
|
||||||
|
if (mangaDetails.coverPic && await helpers.fileExists(path.join(mangaDir, 'cover.jpg')) == false) {
|
||||||
|
const coverPath = path.join(mangaDir, 'cover.jpg');
|
||||||
|
await fs.writeFile(coverPath, mangaDetails.coverPic);
|
||||||
|
mangaDetails.coverPic = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
const { chapters, coverPic, genres, ...mangaInfo } = mangaDetails;
|
||||||
|
let response = await this.mangaPoster.getMangaId(mangaDetails.mangaName);
|
||||||
|
|
||||||
|
if (response.success == true) {
|
||||||
|
mangaDetails.mangaId = response.mangaId;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
mangaDetails.mangaId = helpers.generateId();
|
||||||
|
this.mangaPoster.insertMangaInfo({mangaId: mangaDetails.mangaId, ...mangaInfo});
|
||||||
|
this.mangaPoster.insertMangaGenres(mangaDetails.mangaId, genres);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Download chapters in sequence to manage memory
|
||||||
|
for (const chapter of chapters) {
|
||||||
|
if (await helpers.isDirectoryNotEmpty(path.join(mangaDir, chapter.chapterName))) {
|
||||||
|
console.log(`Skipping Manga: ${mangaDetails.mangaName}, Chapter: ${chapter.order} - ${chapter.chapterName} as it already exists`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
console.log(`Downloading Manga: ${mangaDetails.mangaName}, Chapter: ${chapter.order} - ${chapter.chapterName}`);
|
||||||
|
const chapterDir = path.join(mangaDir, chapter.chapterName);
|
||||||
|
await helpers.ensureDirectory(chapterDir);
|
||||||
|
await this.scraper.downloadChapterPics(chapter, chapterDir);
|
||||||
|
await this.mangaPoster.insertMangaChapter({
|
||||||
|
mangaId: mangaDetails.mangaId,
|
||||||
|
chapterName: chapter.chapterName,
|
||||||
|
chapterOrder: chapter.order
|
||||||
|
});
|
||||||
|
await this.scraper.cleanup();
|
||||||
|
|
||||||
|
// Add smart delay between chapter downloads
|
||||||
|
const delay = helpers.getSmartDelay();
|
||||||
|
console.log(`Waiting ${helpers.formatDelay(delay)} before next chapter...`);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, delay));
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`Completed downloading: ${mangaName}`);
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(`Failed to download manga ${mangaName}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Run the downloader with memory management
|
||||||
|
* @returns {Promise<Object>} Download results
|
||||||
|
*/
|
||||||
|
async run() {
|
||||||
|
const results = {
|
||||||
|
successful: [],
|
||||||
|
failed: []
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Process manga sequentially to manage memory
|
||||||
|
for (const [mangaName, mangaUrl] of Object.entries(this.mangaUrls)) {
|
||||||
|
try {
|
||||||
|
await this.downloadManga(mangaName, mangaUrl);
|
||||||
|
results.successful.push(mangaName);
|
||||||
|
|
||||||
|
// Clean up after each manga
|
||||||
|
await this.scraper.cleanup();
|
||||||
|
|
||||||
|
// Force garbage collection
|
||||||
|
if (global.gc) {
|
||||||
|
global.gc();
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
results.failed.push({ name: mangaName, error: error.message });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
await this.scraper.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = MangaDownloader;
|
||||||
24
src/index.js
Normal file
24
src/index.js
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
require('dotenv').config();
|
||||||
|
const MangaDownloader = require('./downloaders/MangaDownloader');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Main entry point for the manga scraper
|
||||||
|
*/
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
const downloader = new MangaDownloader();
|
||||||
|
await downloader.init_colamanga_scraper();
|
||||||
|
const results = await downloader.run();
|
||||||
|
|
||||||
|
// Exit with error code if any downloads failed
|
||||||
|
if (results.failed.length > 0) {
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Fatal error:', error);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Run the main function
|
||||||
|
main();
|
||||||
72
src/posters/MangaPoster.js
Normal file
72
src/posters/MangaPoster.js
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
const config = require('../config/config');
|
||||||
|
|
||||||
|
class MangaPoster {
|
||||||
|
|
||||||
|
async getMangaId(mangaName) {
|
||||||
|
const response = await fetch(config.getApiUrl('getMangaId') + '/' + mangaName, {
|
||||||
|
method: 'GET',
|
||||||
|
});
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async getGenreId(genreName) {
|
||||||
|
const response = await fetch(config.getApiUrl('getGenreId') + '/' + genreName, {
|
||||||
|
method: 'GET',
|
||||||
|
});
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async insertMangaInfo(mangaInfo) {
|
||||||
|
const response = await fetch(config.getApiUrl('insertMangaInfo'), {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify(mangaInfo)
|
||||||
|
});
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async insertMangaGenres(mangaId, genres) {
|
||||||
|
for (const genre of genres) {
|
||||||
|
let genreId;
|
||||||
|
let response = await this.getGenreId(genre);
|
||||||
|
if (response.success == false) {
|
||||||
|
response = await this.insertGenre({genreName: genre});
|
||||||
|
}
|
||||||
|
genreId = response.genreId;
|
||||||
|
|
||||||
|
response = await fetch(config.getApiUrl('insertMangaGenres'), {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify({mangaId, genreId})
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async insertGenre(genre) {
|
||||||
|
const response = await fetch(config.getApiUrl('insertGenre'), {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify(genre)
|
||||||
|
});
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
|
||||||
|
async insertMangaChapter(mangaChapter) {
|
||||||
|
const response = await fetch(config.getApiUrl('insertMangaChapter'), {
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json'
|
||||||
|
},
|
||||||
|
body: JSON.stringify(mangaChapter)
|
||||||
|
});
|
||||||
|
return response.json();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = MangaPoster;
|
||||||
97
src/scrapers/BaseScraper.js
Normal file
97
src/scrapers/BaseScraper.js
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
const puppeteer = require('puppeteer');
|
||||||
|
const config = require('../config/config');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Base class for manga scrapers
|
||||||
|
* Provides common functionality for browser management and page handling
|
||||||
|
*/
|
||||||
|
class BaseScraper {
|
||||||
|
constructor() {
|
||||||
|
this.browser = null;
|
||||||
|
this.page = null;
|
||||||
|
this.page_response = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Initialize the browser
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async init() {
|
||||||
|
this.browser = await puppeteer.launch({
|
||||||
|
headless: config.getScraperSetting('headless'),
|
||||||
|
args: ['--no-sandbox', '--disable-setuid-sandbox']
|
||||||
|
});
|
||||||
|
this.page = (await this.browser.pages())[0];
|
||||||
|
|
||||||
|
// Set up response interception
|
||||||
|
this.page.on('response', async (response) => {
|
||||||
|
if (this.shouldInterceptResponse(response)) {
|
||||||
|
if(this.isCoverPic(response)) {
|
||||||
|
this.page_response['cover'] = await response.buffer();
|
||||||
|
}
|
||||||
|
else{
|
||||||
|
this.page_response[response.url()] = await response.buffer();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
console.log("Browser initialized");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a response should be intercepted
|
||||||
|
* @param {Response} response - Puppeteer response object
|
||||||
|
* @returns {boolean} Whether to intercept the response
|
||||||
|
*/
|
||||||
|
shouldInterceptResponse(response) {
|
||||||
|
return false; // Override in child classes
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Navigate to a URL
|
||||||
|
* @param {string} url - URL to navigate to
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async navigateTo(url) {
|
||||||
|
await this.page.goto(url, { waitUntil: 'domcontentloaded' });
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Close the browser
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async close() {
|
||||||
|
if (this.page) {
|
||||||
|
await this.page.close();
|
||||||
|
this.page = null;
|
||||||
|
}
|
||||||
|
if (this.browser) {
|
||||||
|
await this.browser.close();
|
||||||
|
this.browser = null;
|
||||||
|
}
|
||||||
|
this.page_response = {};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scroll the page smoothly to the bottom
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async scrollPage() {
|
||||||
|
await this.page.evaluate(async () => {
|
||||||
|
window.scrollTo(0, 0);
|
||||||
|
|
||||||
|
await new Promise((resolve) => {
|
||||||
|
const distance = 100;
|
||||||
|
const delay = 100;
|
||||||
|
const scrollInterval = setInterval(() => {
|
||||||
|
window.scrollBy(0, distance);
|
||||||
|
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight) {
|
||||||
|
clearInterval(scrollInterval);
|
||||||
|
resolve();
|
||||||
|
}
|
||||||
|
}, delay);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = BaseScraper;
|
||||||
258
src/scrapers/ColaMangaScraper.js
Normal file
258
src/scrapers/ColaMangaScraper.js
Normal file
@ -0,0 +1,258 @@
|
|||||||
|
const BaseScraper = require('./BaseScraper');
|
||||||
|
const sharp = require('sharp');
|
||||||
|
const fs = require('fs').promises;
|
||||||
|
const path = require('path');
|
||||||
|
const config = require('../config/config');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scraper implementation for colamanga.com
|
||||||
|
*/
|
||||||
|
class ColaMangaScraper extends BaseScraper {
|
||||||
|
constructor() {
|
||||||
|
super();
|
||||||
|
this.page_response = new Map(); // Use Map instead of object for better memory management
|
||||||
|
this.maxConcurrentDownloads = config.getDownloadSetting('maxConcurrentDownloads');
|
||||||
|
this.downloadQueue = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if a response should be intercepted
|
||||||
|
* @param {Response} response - Puppeteer response object
|
||||||
|
* @returns {boolean} Whether to intercept the response
|
||||||
|
*/
|
||||||
|
shouldInterceptResponse(response) {
|
||||||
|
return response.url().includes('https://res.colamanga.com') ||
|
||||||
|
/blob:https:\/\/www\.colamanga\.com\//.test(response.url());
|
||||||
|
}
|
||||||
|
|
||||||
|
isCoverPic(response) {
|
||||||
|
if(response.url().includes('https://res.colamanga.com')) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract basic manga information from the page
|
||||||
|
* @returns {Promise<Object>} Basic manga info
|
||||||
|
*/
|
||||||
|
async extractBasicInfo() {
|
||||||
|
const mangaName = await this.page.$eval('.fed-deta-content h1', el => el.textContent.trim());
|
||||||
|
return {
|
||||||
|
mangaName: mangaName,
|
||||||
|
mangaAuthor: '',
|
||||||
|
mangaStatus: '',
|
||||||
|
mangaNickname: [],
|
||||||
|
genres: [],
|
||||||
|
chapters: []
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract detailed manga information
|
||||||
|
* @param {Object} mangaInfo - Basic manga info to extend
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async extractDetailedInfo(mangaInfo) {
|
||||||
|
const elements = await this.page.$$('.fed-deta-content li');
|
||||||
|
const infoMap = {
|
||||||
|
'状态': 'mangaStatus',
|
||||||
|
'作者': 'mangaAuthor',
|
||||||
|
'别名': 'mangaNickname',
|
||||||
|
'类别': 'genres'
|
||||||
|
};
|
||||||
|
|
||||||
|
for (const el of elements) {
|
||||||
|
const span = await this.page.evaluate(e => e.querySelector('span')?.textContent.trim(), el);
|
||||||
|
const field = infoMap[span];
|
||||||
|
if (!field) continue;
|
||||||
|
|
||||||
|
if (field === 'mangaNickname' || field === 'genres') {
|
||||||
|
mangaInfo[field] = await this.page.evaluate(e => {
|
||||||
|
return Array.from(e.querySelectorAll('a')).map(a => a.textContent.trim());
|
||||||
|
}, el);
|
||||||
|
} else {
|
||||||
|
mangaInfo[field] = await this.page.evaluate(e => e.querySelector('a')?.textContent.trim(), el);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Extract chapter information from the page
|
||||||
|
* @param {string} mangaUrl - Base manga URL
|
||||||
|
* @returns {Promise<Array>} Array of chapter objects
|
||||||
|
*/
|
||||||
|
async extractChapterInfo(mangaUrl) {
|
||||||
|
const chapterElements = await this.page.$$('.all_data_list li');
|
||||||
|
return Promise.all(chapterElements.reverse().map(async (el, index) => {
|
||||||
|
const chapterName = await el.$eval('a', el => el.textContent.trim());
|
||||||
|
const chapterUrl = await el.$eval('a', el => el.getAttribute('href'));
|
||||||
|
return {
|
||||||
|
chapterName: chapterName,
|
||||||
|
url: new URL(chapterUrl, mangaUrl).href,
|
||||||
|
order: index + 1
|
||||||
|
};
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for cover picture to be loaded
|
||||||
|
* @returns {Promise<Buffer>} Cover image buffer
|
||||||
|
*/
|
||||||
|
async waitForCoverPicture() {
|
||||||
|
const maxWaitTime = config.getScraperSetting('timeout');
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
while (Date.now() - startTime < maxWaitTime) {
|
||||||
|
if (this.page_response['cover']) {
|
||||||
|
return this.page_response['cover'];
|
||||||
|
}
|
||||||
|
await new Promise(resolve => setTimeout(resolve, config.getDownloadSetting('downloadDelay')));
|
||||||
|
}
|
||||||
|
throw new Error('Timeout waiting for cover picture');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get complete manga information
|
||||||
|
* @param {string} mangaUrl - Manga URL
|
||||||
|
* @returns {Promise<Object>} Complete manga information
|
||||||
|
*/
|
||||||
|
async getMangaInfo(mangaUrl) {
|
||||||
|
try {
|
||||||
|
await this.navigateTo(mangaUrl);
|
||||||
|
await this.page.waitForSelector('.fed-deta-info', { visible: true });
|
||||||
|
|
||||||
|
const mangaInfo = await this.extractBasicInfo();
|
||||||
|
await this.extractDetailedInfo(mangaInfo);
|
||||||
|
mangaInfo.chapters = await this.extractChapterInfo(mangaUrl);
|
||||||
|
mangaInfo.coverPic = await this.waitForCoverPicture();
|
||||||
|
return mangaInfo;
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(`Failed to get manga info: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Download chapter images
|
||||||
|
* @param {Object} chapter - Chapter information
|
||||||
|
* @param {string} chapterDir - Chapter directory
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async downloadChapterPics(chapter, chapterDir = ".") {
|
||||||
|
try {
|
||||||
|
await this.navigateTo(chapter.url);
|
||||||
|
await this.page.waitForSelector('.mh_mangalist', { visible: true });
|
||||||
|
await this.scrollPage();
|
||||||
|
await this.saveImages(chapterDir);
|
||||||
|
this.page_response.clear();
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(`Failed to download chapter ${chapter.name}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for all chapter images to load
|
||||||
|
* @returns {Promise<number>} Total number of images
|
||||||
|
*/
|
||||||
|
async saveImages(chapterDir) {
|
||||||
|
const loadingElements = await this.page.$$('.mh_comicpic');
|
||||||
|
const reversedElements = loadingElements.reverse();
|
||||||
|
|
||||||
|
let totalImages = 0;
|
||||||
|
for (const element of reversedElements) {
|
||||||
|
const pValue = await this.page.evaluate(el => el.getAttribute('p'), element);
|
||||||
|
|
||||||
|
await this.page.evaluate(el => {
|
||||||
|
const rect = el.getBoundingClientRect();
|
||||||
|
window.scrollTo({
|
||||||
|
top: window.scrollY + rect.top - (window.innerHeight / 2),
|
||||||
|
behavior: 'smooth'
|
||||||
|
});
|
||||||
|
}, element);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
|
||||||
|
const loadingDiv = await element.$('.mh_loading');
|
||||||
|
let isVisible = true;
|
||||||
|
if (loadingDiv) {
|
||||||
|
isVisible = await this.page.evaluate(el => {
|
||||||
|
return el.offsetParent !== null;
|
||||||
|
}, loadingDiv);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isVisible) {
|
||||||
|
if (totalImages == 0) {
|
||||||
|
totalImages = pValue;
|
||||||
|
}
|
||||||
|
const imgDiv = await element.$('img');
|
||||||
|
const imgSrc = await this.page.evaluate(el => el.src, imgDiv);
|
||||||
|
while (!this.page_response[imgSrc]) {
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 100));
|
||||||
|
}
|
||||||
|
await this.saveImage(imgSrc, this.page_response[imgSrc], chapterDir);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return totalImages;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get image order from page
|
||||||
|
* @param {string} src - Image source URL
|
||||||
|
* @returns {Promise<string>} Image order
|
||||||
|
*/
|
||||||
|
async getImgOrder(src) {
|
||||||
|
const loadingAttributes = await this.page.$$eval('.mh_comicpic', (elements, src) => {
|
||||||
|
return elements
|
||||||
|
.filter(el => el.querySelector(`img[src="${src}"]`))
|
||||||
|
.map(el => el.getAttribute('p'))
|
||||||
|
.map(str => str.padStart(elements.length.toString().length, '0'));
|
||||||
|
}, src);
|
||||||
|
return loadingAttributes[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Process and save a single image
|
||||||
|
* @param {string} url - Image URL
|
||||||
|
* @param {Buffer} buffer - Image buffer
|
||||||
|
* @param {string} chapterDir - Chapter directory
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
async saveImage(url, buffer, chapterDir) {
|
||||||
|
try {
|
||||||
|
const order = await this.getImgOrder(url);
|
||||||
|
const filename = `${order}.webp`;
|
||||||
|
const filepath = path.join(chapterDir, filename);
|
||||||
|
|
||||||
|
// Process image with sharp using streams for better memory management
|
||||||
|
await sharp(buffer)
|
||||||
|
.webp({
|
||||||
|
quality: config.getDownloadSetting('imageQuality')
|
||||||
|
})
|
||||||
|
.toFormat('webp')
|
||||||
|
.toFile(filepath);
|
||||||
|
|
||||||
|
// Clear the buffer from memory
|
||||||
|
buffer = null;
|
||||||
|
return true;
|
||||||
|
} catch (error) {
|
||||||
|
console.error(`Failed to process image ${url}: ${error.message}`);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Clean up resources
|
||||||
|
*/
|
||||||
|
async cleanup() {
|
||||||
|
this.page_response.clear();
|
||||||
|
if (this.page) {
|
||||||
|
await this.page.evaluate(() => {
|
||||||
|
// Clear browser memory
|
||||||
|
if (window.gc) {
|
||||||
|
window.gc();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = ColaMangaScraper;
|
||||||
157
src/utils/helpers.js
Normal file
157
src/utils/helpers.js
Normal file
@ -0,0 +1,157 @@
|
|||||||
|
const fs = require('fs');
|
||||||
|
const path = require('path');
|
||||||
|
const { v4: uuidv4 } = require('uuid');
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility functions for the manga scraper
|
||||||
|
*/
|
||||||
|
class Helpers {
|
||||||
|
/**
|
||||||
|
* Sanitize a string to be used as a filename
|
||||||
|
* @param {string} str - String to sanitize
|
||||||
|
* @returns {string} Sanitized string
|
||||||
|
*/
|
||||||
|
static sanitizeFileName(str) {
|
||||||
|
const invalidChars = /[<>:"/\\|?*\x00-\x1F]/g;
|
||||||
|
return str.replace(invalidChars, '_')
|
||||||
|
.replace(/\s+/g, '_')
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a smart delay based on time of day
|
||||||
|
* @returns {number} Delay in milliseconds
|
||||||
|
*/
|
||||||
|
static getSmartDelay() {
|
||||||
|
const hour = new Date().getHours();
|
||||||
|
let minDelay, maxDelay;
|
||||||
|
|
||||||
|
// Midnight to 6 AM: 20-30 minutes
|
||||||
|
if (hour >= 0 && hour < 6) {
|
||||||
|
minDelay = 20 * 60 * 1000;
|
||||||
|
maxDelay = 30 * 60 * 1000;
|
||||||
|
}
|
||||||
|
// 6 PM to midnight: 5-8 minutes
|
||||||
|
else if (hour >= 18 && hour < 24) {
|
||||||
|
minDelay = 5 * 60 * 1000;
|
||||||
|
maxDelay = 8 * 60 * 1000;
|
||||||
|
}
|
||||||
|
// Day time: 10-12 minutes
|
||||||
|
else {
|
||||||
|
minDelay = 10 * 60 * 1000;
|
||||||
|
maxDelay = 12 * 60 * 1000;
|
||||||
|
}
|
||||||
|
|
||||||
|
return minDelay + Math.random() * (maxDelay - minDelay);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Format delay time for logging
|
||||||
|
* @param {number} delay - Delay in milliseconds
|
||||||
|
* @returns {string} Formatted delay string
|
||||||
|
*/
|
||||||
|
static formatDelay(delay) {
|
||||||
|
const minutes = Math.floor(delay / 60000);
|
||||||
|
const seconds = Math.floor((delay % 60000) / 1000);
|
||||||
|
return `${minutes} minutes and ${seconds} seconds`;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Generate a unique ID
|
||||||
|
* @returns {string} UUID v4
|
||||||
|
*/
|
||||||
|
static generateId() {
|
||||||
|
return uuidv4();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Ensure directory exists
|
||||||
|
* @param {string} dirPath - Directory path
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
static async ensureDirectory(dirPath) {
|
||||||
|
try {
|
||||||
|
await fs.promises.mkdir(dirPath, { recursive: true });
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(`Failed to create directory ${dirPath}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Read JSON file
|
||||||
|
* @param {string} filePath - Path to JSON file
|
||||||
|
* @returns {Promise<Object>} Parsed JSON data
|
||||||
|
*/
|
||||||
|
static async readJsonFile(filePath) {
|
||||||
|
try {
|
||||||
|
const data = await fs.promises.readFile(filePath, 'utf8');
|
||||||
|
return JSON.parse(data);
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(`Failed to read JSON file ${filePath}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write JSON file
|
||||||
|
* @param {string} filePath - Path to JSON file
|
||||||
|
* @param {Object} data - Data to write
|
||||||
|
* @returns {Promise<void>}
|
||||||
|
*/
|
||||||
|
static async writeJsonFile(filePath, data) {
|
||||||
|
try {
|
||||||
|
await fs.promises.writeFile(filePath, JSON.stringify(data, null, 2));
|
||||||
|
} catch (error) {
|
||||||
|
throw new Error(`Failed to write JSON file ${filePath}: ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if file exists
|
||||||
|
* @param {string} filePath - Path to file
|
||||||
|
* @returns {Promise<boolean>}
|
||||||
|
*/
|
||||||
|
static async fileExists(filePath) {
|
||||||
|
try {
|
||||||
|
await fs.promises.access(filePath);
|
||||||
|
return true;
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get file extension from URL
|
||||||
|
* @param {string} url - URL to get extension from
|
||||||
|
* @returns {string} File extension
|
||||||
|
*/
|
||||||
|
static getFileExtension(url) {
|
||||||
|
try {
|
||||||
|
const parsedUrl = new URL(url);
|
||||||
|
const pathname = parsedUrl.pathname;
|
||||||
|
const extension = path.extname(pathname).toLowerCase();
|
||||||
|
return extension || '.webp'; // Default to .webp if no extension found
|
||||||
|
} catch {
|
||||||
|
return '.webp';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if directory exists and is not empty
|
||||||
|
* @param {string} dirPath - Directory path to check
|
||||||
|
* @returns {Promise<boolean>} True if directory exists and is not empty, false otherwise
|
||||||
|
*/
|
||||||
|
static async isDirectoryNotEmpty(dirPath) {
|
||||||
|
try {
|
||||||
|
const stats = await fs.promises.stat(dirPath);
|
||||||
|
if (!stats.isDirectory()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
const files = await fs.promises.readdir(dirPath);
|
||||||
|
return files.length > 0;
|
||||||
|
} catch (error) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = Helpers;
|
||||||
Loading…
x
Reference in New Issue
Block a user