Updated scraper for new server

This commit is contained in:
yiekheng 2025-05-14 16:20:56 +08:00
parent a3bf75bd36
commit 66b78bca1d
13 changed files with 1004 additions and 295 deletions

1
.gitignore vendored
View File

@ -1,2 +1,3 @@
/node_modules /node_modules
/test /test
.env

View File

@ -1,131 +0,0 @@
const { BaseScraper } = require('./scraper.js');
const sharp = require('sharp');
const fs = require('fs');
const path = require('path');
class ColaMangaScraper extends BaseScraper {
constructor() {
super();
}
async saveBufferAsWebp(buffer, filename, dir = '.') {
const dirPath = path.resolve(dir);
const filePath = path.join(dirPath, filename);
try {
await fs.promises.mkdir(dirPath, { recursive: true });
await sharp(buffer).webp({ quality: 80 }).toFormat('webp').toFile(filePath);
} catch (error) {
console.error(`Failed to save ${filename}:`, error);
}
}
async getMangaInfo(mangaUrl) {
const page = await this.loadPage(mangaUrl);
await page.waitForSelector('.fed-deta-info', { visible: true });
const mangaName = await page.$eval('.fed-deta-content h1', el => el.textContent);
const elements = await page.$$('.fed-deta-content li');
const mangaInfo = {
name: mangaName,
author: '',
nickNames: [],
genres: [],
status: '',
chapters: []
};
for (const el of elements) {
const span = await el.$eval('span', el => el.textContent.trim());
if (span === '状态') {
mangaInfo.status = await el.$eval('a', el => el.textContent);
} else if (span === '作者') {
mangaInfo.author = await el.$eval('a', el => el.textContent);
} else if (span === '别名') {
mangaInfo.nickNames = await el.$$eval('a', els => els.map(el => el.textContent));
} else if (span === '类别') {
mangaInfo.genres = await el.$$eval('a', els => els.map(el => el.textContent));
}
}
const chapterElements = await page.$$('.all_data_list li');
mangaInfo.chapters = await Promise.all(chapterElements.map(async el => {
const chapterName = await el.$eval('a', el => el.textContent);
const chapterUrl = await el.$eval('a', el => el.getAttribute('href'));
return {
name: chapterName,
url: chapterUrl
};
}));
while (!this.pages_response[this.pages.indexOf(page)]) {
await new Promise(resolve => setTimeout(resolve, 100));
}
mangaInfo.coverPic = this.pages_response[this.pages.indexOf(page)];
await this.closePage(page);
return mangaInfo;
}
async downloadChapterPics(chapter, chapterDir = ".") {
const directoryPath = path.resolve(chapterDir);
if (fs.existsSync(directoryPath)) {
console.log(`Skipping ${chapter.name} as it already exists`);
return;
}
fs.mkdirSync(directoryPath, { recursive: true });
const page = await this.loadPage(chapter.url);
const pageIndex = this.pages.indexOf(page);
await page.waitForSelector('.mh_mangalist', { visible: true });
for (let attempt = 0; attempt < 10; attempt++) {
console.log(`Downloading ${chapter.name}, attempt ${attempt + 1}`);
await this.scrollPage(page);
const loadingElements = await page.$$eval('.mh_loading:not([style*="display: none"])', elements => elements.length);
await new Promise(resolve => setTimeout(resolve, 1000));
if (loadingElements === 0 && Object.keys(this.pages_response[pageIndex]).length !== 0) {
break;
}
}
const responses = this.pages_response[pageIndex];
for (const [url, response] of Object.entries(responses)) {
const fileName = (await this.getImgOrder(page, url)) + '.webp';
const buffer = await (new Blob([response], { type: 'image/webp' })).arrayBuffer();
await this.saveBufferToWebp(buffer, fileName, chapterDir);
}
await this.closePage(page);
}
async downloadChapter(chapters, dir = ".") {
const dirPath = path.resolve(dir);
if (!fs.existsSync(dirPath)) {
fs.mkdirSync(dirPath, { recursive: true });
}
// const chapter = chapters[Math.floor(Math.random() * chapters.length)];
for (const chapter of chapters) {
await this.downloadChapterPics(chapter, path.join(dir, chapter.name));
}
}
async getImgOrder(page, src) {
const loadingAttributes = await page.$$eval('.mh_comicpic', (elements, src) => {
return elements
.filter(el => el.querySelector(`img[src="${src}"]`))
.map(el => el.getAttribute('p'))
.map(str => str.padStart(elements.length.toString().length, '0'));
}, src);
return loadingAttributes;
}
}
(async () => {
const scraper = new ColaMangaScraper();
await scraper.init();
const mangaUrl = 'https://www.colamanga.com/manga-od825111/';
const mangaInfo = await scraper.getMangaInfo(mangaUrl);
await scraper.saveBufferToWebp(mangaInfo.coverPic, 'cover.webp', 'test');
await scraper.downloadChapter(mangaInfo.chapters, 'test');
await scraper.closeAllPages();
console.log(mangaInfo);
})();

View File

@ -1,8 +0,0 @@
const axios = require('axios');
const postData = {
mangaId: 'yourMangaId',
mangaName: 'yourMangaName'
};
axios.post('http://localhost:4000/insert', postData)

231
package-lock.json generated
View File

@ -1,18 +1,19 @@
{ {
"name": "sunnymh-scrap", "name": "sunnymh-scraper",
"version": "1.0.0", "version": "1.0.0",
"lockfileVersion": 3, "lockfileVersion": 3,
"requires": true, "requires": true,
"packages": { "packages": {
"": { "": {
"name": "sunnymh-scrap", "name": "sunnymh-scraper",
"version": "1.0.0", "version": "1.0.0",
"license": "ISC", "license": "ISC",
"dependencies": { "dependencies": {
"axios": "^1.8.4", "axios": "^1.6.7",
"fs": "^0.0.1-security", "dotenv": "^16.5.0",
"puppeteer": "^24.4.0", "puppeteer": "^22.0.0",
"sharp": "^0.33.5" "sharp": "^0.33.2",
"uuid": "^9.0.1"
} }
}, },
"node_modules/@babel/code-frame": { "node_modules/@babel/code-frame": {
@ -410,17 +411,18 @@
} }
}, },
"node_modules/@puppeteer/browsers": { "node_modules/@puppeteer/browsers": {
"version": "2.8.0", "version": "2.3.0",
"resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.8.0.tgz", "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.3.0.tgz",
"integrity": "sha512-yTwt2KWRmCQAfhvbCRjebaSX8pV1//I0Y3g+A7f/eS7gf0l4eRJoUCvcYdVtboeU4CTOZQuqYbZNS8aBYb8ROQ==", "integrity": "sha512-ioXoq9gPxkss4MYhD+SFaU9p1IHFUX0ILAWFPyjGaBdjLsYAlZw6j1iLA0N/m12uVHLFDfSYNF7EQccjinIMDA==",
"license": "Apache-2.0", "license": "Apache-2.0",
"dependencies": { "dependencies": {
"debug": "^4.4.0", "debug": "^4.3.5",
"extract-zip": "^2.0.1", "extract-zip": "^2.0.1",
"progress": "^2.0.3", "progress": "^2.0.3",
"proxy-agent": "^6.5.0", "proxy-agent": "^6.4.0",
"semver": "^7.7.1", "semver": "^7.6.3",
"tar-fs": "^3.0.8", "tar-fs": "^3.0.6",
"unbzip2-stream": "^1.4.3",
"yargs": "^17.7.2" "yargs": "^17.7.2"
}, },
"bin": { "bin": {
@ -437,13 +439,13 @@
"license": "MIT" "license": "MIT"
}, },
"node_modules/@types/node": { "node_modules/@types/node": {
"version": "22.13.13", "version": "22.15.15",
"resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.13.tgz", "resolved": "https://registry.npmjs.org/@types/node/-/node-22.15.15.tgz",
"integrity": "sha512-ClsL5nMwKaBRwPcCvH8E7+nU4GxHVx1axNvMZTFHMEfNI7oahimt26P5zjVCRrjiIWj6YFXfE1v3dEp94wLcGQ==", "integrity": "sha512-R5muMcZob3/Jjchn5LcO8jdKwSCbzqmPB6ruBxMcf9kbxtniZHP327s6C37iOfuw8mbKK3cAQa7sEl7afLrQ8A==",
"license": "MIT", "license": "MIT",
"optional": true, "optional": true,
"dependencies": { "dependencies": {
"undici-types": "~6.20.0" "undici-types": "~6.21.0"
} }
}, },
"node_modules/@types/yauzl": { "node_modules/@types/yauzl": {
@ -538,9 +540,9 @@
"optional": true "optional": true
}, },
"node_modules/bare-fs": { "node_modules/bare-fs": {
"version": "4.0.2", "version": "4.1.4",
"resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.0.2.tgz", "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.1.4.tgz",
"integrity": "sha512-S5mmkMesiduMqnz51Bfh0Et9EX0aTCJxhsI4bvzFFLs8Z1AV8RDHadfY5CyLwdoLHgXbNBEN1gQcbEtGwuvixw==", "integrity": "sha512-r8+26Voz8dGX3AYpJdFb1ZPaUSM8XOLCZvy+YGpRTmwPHIxA7Z3Jov/oMPtV7hfRQbOnH8qGlLTzQAbgtdNN0Q==",
"license": "Apache-2.0", "license": "Apache-2.0",
"optional": true, "optional": true,
"dependencies": { "dependencies": {
@ -602,6 +604,26 @@
} }
} }
}, },
"node_modules/base64-js": {
"version": "1.5.1",
"resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
"integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"license": "MIT"
},
"node_modules/basic-ftp": { "node_modules/basic-ftp": {
"version": "5.0.5", "version": "5.0.5",
"resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz", "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz",
@ -611,6 +633,30 @@
"node": ">=10.0.0" "node": ">=10.0.0"
} }
}, },
"node_modules/buffer": {
"version": "5.7.1",
"resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz",
"integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"license": "MIT",
"dependencies": {
"base64-js": "^1.3.1",
"ieee754": "^1.1.13"
}
},
"node_modules/buffer-crc32": { "node_modules/buffer-crc32": {
"version": "0.2.13", "version": "0.2.13",
"resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz",
@ -643,13 +689,14 @@
} }
}, },
"node_modules/chromium-bidi": { "node_modules/chromium-bidi": {
"version": "2.1.2", "version": "0.6.3",
"resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-2.1.2.tgz", "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.6.3.tgz",
"integrity": "sha512-vtRWBK2uImo5/W2oG6/cDkkHSm+2t6VHgnj+Rcwhb0pP74OoUb4GipyRX/T/y39gYQPhioP0DPShn+A7P6CHNw==", "integrity": "sha512-qXlsCmpCZJAnoTYI83Iu6EdYQpMYdVkCfq08KDh2pmlVqK5t5IA9mGs4/LwCwp4fqisSOMXZxP3HIh8w8aRn0A==",
"license": "Apache-2.0", "license": "Apache-2.0",
"dependencies": { "dependencies": {
"mitt": "^3.0.1", "mitt": "3.0.1",
"zod": "^3.24.1" "urlpattern-polyfill": "10.0.0",
"zod": "3.23.8"
}, },
"peerDependencies": { "peerDependencies": {
"devtools-protocol": "*" "devtools-protocol": "*"
@ -807,11 +854,23 @@
} }
}, },
"node_modules/devtools-protocol": { "node_modules/devtools-protocol": {
"version": "0.0.1413902", "version": "0.0.1312386",
"resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1413902.tgz", "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1312386.tgz",
"integrity": "sha512-yRtvFD8Oyk7C9Os3GmnFZLu53yAfsnyw1s+mLmHHUK0GQEc9zthHWvS1r67Zqzm5t7v56PILHIVZ7kmFMaL2yQ==", "integrity": "sha512-DPnhUXvmvKT2dFA/j7B+riVLUt9Q6RKJlcppojL5CoRywJJKLDYnRlw0gTFKfgDPHP5E04UoB71SxoJlVZy8FA==",
"license": "BSD-3-Clause" "license": "BSD-3-Clause"
}, },
"node_modules/dotenv": {
"version": "16.5.0",
"resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.5.0.tgz",
"integrity": "sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==",
"license": "BSD-2-Clause",
"engines": {
"node": ">=12"
},
"funding": {
"url": "https://dotenvx.com"
}
},
"node_modules/dunder-proto": { "node_modules/dunder-proto": {
"version": "1.0.1", "version": "1.0.1",
"resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz",
@ -1035,12 +1094,6 @@
"node": ">= 6" "node": ">= 6"
} }
}, },
"node_modules/fs": {
"version": "0.0.1-security",
"resolved": "https://registry.npmjs.org/fs/-/fs-0.0.1-security.tgz",
"integrity": "sha512-3XY9e1pP0CVEUCdj5BmfIZxRBTSDycnbqhIOGec9QYtmVH2fbLpj86CFWkrNOkt/Fvty4KZG5lTglL9j/gJ87w==",
"license": "ISC"
},
"node_modules/function-bind": { "node_modules/function-bind": {
"version": "1.1.2", "version": "1.1.2",
"resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz",
@ -1202,6 +1255,26 @@
"node": ">= 14" "node": ">= 14"
} }
}, },
"node_modules/ieee754": {
"version": "1.2.1",
"resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz",
"integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==",
"funding": [
{
"type": "github",
"url": "https://github.com/sponsors/feross"
},
{
"type": "patreon",
"url": "https://www.patreon.com/feross"
},
{
"type": "consulting",
"url": "https://feross.org/support"
}
],
"license": "BSD-3-Clause"
},
"node_modules/import-fresh": { "node_modules/import-fresh": {
"version": "3.3.1", "version": "3.3.1",
"resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz",
@ -1470,38 +1543,35 @@
} }
}, },
"node_modules/puppeteer": { "node_modules/puppeteer": {
"version": "24.4.0", "version": "22.15.0",
"resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-24.4.0.tgz", "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-22.15.0.tgz",
"integrity": "sha512-E4JhJzjS8AAI+6N/b+Utwarhz6zWl3+MR725fal+s3UlOlX2eWdsvYYU+Q5bXMjs9eZEGkNQroLkn7j11s2k1Q==", "integrity": "sha512-XjCY1SiSEi1T7iSYuxS82ft85kwDJUS7wj1Z0eGVXKdtr5g4xnVcbjwxhq5xBnpK/E7x1VZZoJDxpjAOasHT4Q==",
"hasInstallScript": true, "hasInstallScript": true,
"license": "Apache-2.0", "license": "Apache-2.0",
"dependencies": { "dependencies": {
"@puppeteer/browsers": "2.8.0", "@puppeteer/browsers": "2.3.0",
"chromium-bidi": "2.1.2",
"cosmiconfig": "^9.0.0", "cosmiconfig": "^9.0.0",
"devtools-protocol": "0.0.1413902", "devtools-protocol": "0.0.1312386",
"puppeteer-core": "24.4.0", "puppeteer-core": "22.15.0"
"typed-query-selector": "^2.12.0"
}, },
"bin": { "bin": {
"puppeteer": "lib/cjs/puppeteer/node/cli.js" "puppeteer": "lib/esm/puppeteer/node/cli.js"
}, },
"engines": { "engines": {
"node": ">=18" "node": ">=18"
} }
}, },
"node_modules/puppeteer-core": { "node_modules/puppeteer-core": {
"version": "24.4.0", "version": "22.15.0",
"resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.4.0.tgz", "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.15.0.tgz",
"integrity": "sha512-eFw66gCnWo0X8Hyf9KxxJtms7a61NJVMiSaWfItsFPzFBsjsWdmcNlBdsA1WVwln6neoHhsG+uTVesKmTREn/g==", "integrity": "sha512-cHArnywCiAAVXa3t4GGL2vttNxh7GqXtIYGym99egkNJ3oG//wL9LkvO4WE8W1TJe95t1F1ocu9X4xWaGsOKOA==",
"license": "Apache-2.0", "license": "Apache-2.0",
"dependencies": { "dependencies": {
"@puppeteer/browsers": "2.8.0", "@puppeteer/browsers": "2.3.0",
"chromium-bidi": "2.1.2", "chromium-bidi": "0.6.3",
"debug": "^4.4.0", "debug": "^4.3.6",
"devtools-protocol": "0.0.1413902", "devtools-protocol": "0.0.1312386",
"typed-query-selector": "^2.12.0", "ws": "^8.18.0"
"ws": "^8.18.1"
}, },
"engines": { "engines": {
"node": ">=18" "node": ">=18"
@ -1718,25 +1788,54 @@
"b4a": "^1.6.4" "b4a": "^1.6.4"
} }
}, },
"node_modules/through": {
"version": "2.3.8",
"resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz",
"integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==",
"license": "MIT"
},
"node_modules/tslib": { "node_modules/tslib": {
"version": "2.8.1", "version": "2.8.1",
"resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz",
"integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==",
"license": "0BSD" "license": "0BSD"
}, },
"node_modules/typed-query-selector": { "node_modules/unbzip2-stream": {
"version": "2.12.0", "version": "1.4.3",
"resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.0.tgz", "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz",
"integrity": "sha512-SbklCd1F0EiZOyPiW192rrHZzZ5sBijB6xM+cpmrwDqObvdtunOHHIk9fCGsoK5JVIYXoyEp4iEdE3upFH3PAg==", "integrity": "sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==",
"license": "MIT" "license": "MIT",
"dependencies": {
"buffer": "^5.2.1",
"through": "^2.3.8"
}
}, },
"node_modules/undici-types": { "node_modules/undici-types": {
"version": "6.20.0", "version": "6.21.0",
"resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz",
"integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==",
"license": "MIT", "license": "MIT",
"optional": true "optional": true
}, },
"node_modules/urlpattern-polyfill": {
"version": "10.0.0",
"resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz",
"integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==",
"license": "MIT"
},
"node_modules/uuid": {
"version": "9.0.1",
"resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz",
"integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==",
"funding": [
"https://github.com/sponsors/broofa",
"https://github.com/sponsors/ctavan"
],
"license": "MIT",
"bin": {
"uuid": "dist/bin/uuid"
}
},
"node_modules/wrap-ansi": { "node_modules/wrap-ansi": {
"version": "7.0.0", "version": "7.0.0",
"resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz",
@ -1761,9 +1860,9 @@
"license": "ISC" "license": "ISC"
}, },
"node_modules/ws": { "node_modules/ws": {
"version": "8.18.1", "version": "8.18.2",
"resolved": "https://registry.npmjs.org/ws/-/ws-8.18.1.tgz", "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz",
"integrity": "sha512-RKW2aJZMXeMxVpnZ6bck+RswznaxmzdULiBr6KY7XkTnW8uvt0iT9H5DkHUChXrc+uurzwa0rVI16n/Xzjdz1w==", "integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==",
"license": "MIT", "license": "MIT",
"engines": { "engines": {
"node": ">=10.0.0" "node": ">=10.0.0"
@ -1828,9 +1927,9 @@
} }
}, },
"node_modules/zod": { "node_modules/zod": {
"version": "3.24.2", "version": "3.23.8",
"resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz", "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz",
"integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==", "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==",
"license": "MIT", "license": "MIT",
"funding": { "funding": {
"url": "https://github.com/sponsors/colinhacks" "url": "https://github.com/sponsors/colinhacks"

View File

@ -1,18 +1,24 @@
{ {
"name": "sunnymh-scrap", "name": "sunnymh-scraper",
"version": "1.0.0", "version": "1.0.0",
"main": "index.cjs", "description": "Manga scraper",
"main": "src/index.js",
"scripts": { "scripts": {
"start": "node index.cjs", "start": "node src/index.js",
"test": "echo \"Error: no test specified\" && exit 1" "test": "echo \"Error: no test specified\" && exit 1"
}, },
"keywords": [
"manga",
"scraper",
"web-scraping"
],
"author": "", "author": "",
"license": "ISC", "license": "ISC",
"description": "",
"dependencies": { "dependencies": {
"axios": "^1.8.4", "axios": "^1.6.7",
"fs": "^0.0.1-security", "dotenv": "^16.5.0",
"puppeteer": "^24.4.0", "puppeteer": "^22.0.0",
"sharp": "^0.33.5" "sharp": "^0.33.2",
"uuid": "^9.0.1"
} }
} }

View File

@ -1,81 +0,0 @@
const puppeteer = require('puppeteer');
exports.BaseScraper = class BaseScraper {
constructor() {
this.browser = null;
this.pages = [];
this.pages_response = [];
}
async init() {
this.browser = await puppeteer.launch({
// headless: false,
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
}
async openPage(url) {
const page = await this.browser.newPage();
page.on('response', async (response) => {
const pageIndex = this.pages.indexOf(page);
if (response.url().includes('https://res.colamanga.com')) {
this.pages_response[pageIndex] = await response.buffer();
}
else if (/blob:https:\/\/www\.colamanga\.com\//.test(response.url())) {
if (!this.pages_response[pageIndex]) {
this.pages_response[pageIndex] = {};
}
this.pages_response[pageIndex][response.url()] = await response.buffer();
}
});
await page.goto(url, { waitUntil: 'domcontentloaded' });
this.pages.push(page);
return page;
}
async closePage(page) {
const pageIndex = this.pages.indexOf(page);
if (pageIndex !== -1) {
this.pages.splice(pageIndex, 1);
this.pages_response.splice(pageIndex, 1);
}
await page.close();
}
async closeAllPages() {
await Promise.all(this.pages.map(page => page.close()));
this.pages = [];
this.pages_response = [];
await this.browser.close();
}
async loadPage(url) {
const page = await this.openPage(url);
return page;
}
async scrollPage(page) {
await page.evaluate(async () => {
window.scrollTo(0, 0);
await new Promise((resolve) => {
const distance = 100; // distance to scroll
const delay = 100; // delay between scrolls
const scrollInterval = setInterval(() => {
window.scrollBy(0, distance);
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight) {
clearInterval(scrollInterval);
resolve();
}
}, delay);
});
});
}
}

81
src/config/config.js Normal file
View File

@ -0,0 +1,81 @@
/**
* Configuration module for the manga scraper
*/
class Config {
constructor() {
this.loadConfig();
}
/**
* Load configuration from environment variables with defaults
*/
loadConfig() {
this.baseDir = process.env.MANGA_BASE_DIR;
this.apiBaseUrl = process.env.API_URL;
this.mangaUrlsPath = process.env.MANGA_URLS_PATH
// API endpoints
this.endpoints = {
insertMangaInfo: `${this.apiBaseUrl}/manga/insertMangaInfo`,
insertMangaChapter: `${this.apiBaseUrl}/manga/insertMangaChapter`,
insertMangaGenres: `${this.apiBaseUrl}/manga/insertMangaGenres`,
insertGenre: `${this.apiBaseUrl}/genre/insertGenre`,
getGenreId: `${this.apiBaseUrl}/genre/getGenreId`,
getMangaId: `${this.apiBaseUrl}/manga/getMangaId`
};
// Scraper settings
this.scraper = {
headless: process.env.SCRAPER_HEADLESS === 'true' ? true : false,
timeout: parseInt(process.env.SCRAPER_TIMEOUT, 10),
retryAttempts: parseInt(process.env.SCRAPER_RETRY_ATTEMPTS, 10),
retryDelay: parseInt(process.env.SCRAPER_RETRY_DELAY, 10)
};
// Download settings
this.download = {
imageQuality: parseInt(process.env.IMAGE_QUALITY, 10),
maxConcurrentDownloads: parseInt(process.env.MAX_CONCURRENT_DOWNLOADS, 10),
downloadDelay: parseInt(process.env.DOWNLOAD_DELAY, 10)
};
}
/**
* Get API endpoint URL
* @param {string} endpoint - Endpoint name
* @returns {string} Full API URL
*/
getApiUrl(endpoint) {
if (!this.endpoints[endpoint]) {
throw new Error(`Unknown API endpoint: ${endpoint}`);
}
return this.endpoints[endpoint];
}
/**
* Get scraper setting
* @param {string} setting - Setting name
* @returns {any} Setting value
*/
getScraperSetting(setting) {
if (!(setting in this.scraper)) {
throw new Error(`Unknown scraper setting: ${setting}`);
}
return this.scraper[setting];
}
/**
* Get download setting
* @param {string} setting - Setting name
* @returns {any} Setting value
*/
getDownloadSetting(setting) {
if (!(setting in this.download)) {
throw new Error(`Unknown download setting: ${setting}`);
}
return this.download[setting];
}
}
// Export singleton instance
module.exports = new Config();

View File

@ -0,0 +1,134 @@
const fs = require('fs').promises;
const path = require('path');
const helpers = require('../utils/helpers');
const ColaMangaScraper = require('../scrapers/ColaMangaScraper');
const MangaPoster = require('../posters/MangaPoster');
const config = require('../config/config');
/**
* Main class for downloading manga
*/
class MangaDownloader {
constructor() {
this.outputDir = config.baseDir;
this.maxConcurrentManga = config.getDownloadSetting('maxConcurrentDownloads');
this.mangaPoster = new MangaPoster();
}
/**
* Initialize the downloader
* @returns {Promise<void>}
*/
async init_colamanga_scraper() {
this.mangaUrls = await helpers.readJsonFile(config.mangaUrlsPath) || {};
this.scraper = new ColaMangaScraper();
await this.scraper.init();
}
/**
* Download a single manga with memory management
* @param {string} mangaName - Manga name
* @param {string} mangaUrl - Manga URL
* @returns {Promise<void>}
*/
async downloadManga(mangaName, mangaUrl) {
console.log(`Getting Manga: ${mangaName} info`);
try {
const mangaDetails = await this.scraper.getMangaInfo(mangaUrl);
// // Save manga info
// const infoPath = path.join(this.outputDir, 'info.json');
// await fs.writeFile(infoPath, JSON.stringify(mangaDetails, null, 2));
// Load manga info
// const mangaDetails = await helpers.readJsonFile(path.join(this.outputDir, 'info.json'));
const mangaDir = path.join(this.outputDir, mangaName);
await helpers.ensureDirectory(mangaDir);
// Save cover image
if (mangaDetails.coverPic && await helpers.fileExists(path.join(mangaDir, 'cover.jpg')) == false) {
const coverPath = path.join(mangaDir, 'cover.jpg');
await fs.writeFile(coverPath, mangaDetails.coverPic);
mangaDetails.coverPic = null;
}
const { chapters, coverPic, genres, ...mangaInfo } = mangaDetails;
let response = await this.mangaPoster.getMangaId(mangaDetails.mangaName);
if (response.success == true) {
mangaDetails.mangaId = response.mangaId;
}
else {
mangaDetails.mangaId = helpers.generateId();
this.mangaPoster.insertMangaInfo({mangaId: mangaDetails.mangaId, ...mangaInfo});
this.mangaPoster.insertMangaGenres(mangaDetails.mangaId, genres);
}
// Download chapters in sequence to manage memory
for (const chapter of chapters) {
if (await helpers.isDirectoryNotEmpty(path.join(mangaDir, chapter.chapterName))) {
console.log(`Skipping Manga: ${mangaDetails.mangaName}, Chapter: ${chapter.order} - ${chapter.chapterName} as it already exists`);
continue;
}
console.log(`Downloading Manga: ${mangaDetails.mangaName}, Chapter: ${chapter.order} - ${chapter.chapterName}`);
const chapterDir = path.join(mangaDir, chapter.chapterName);
await helpers.ensureDirectory(chapterDir);
await this.scraper.downloadChapterPics(chapter, chapterDir);
await this.mangaPoster.insertMangaChapter({
mangaId: mangaDetails.mangaId,
chapterName: chapter.chapterName,
chapterOrder: chapter.order
});
await this.scraper.cleanup();
// Add smart delay between chapter downloads
const delay = helpers.getSmartDelay();
console.log(`Waiting ${helpers.formatDelay(delay)} before next chapter...`);
await new Promise(resolve => setTimeout(resolve, delay));
}
console.log(`Completed downloading: ${mangaName}`);
} catch (error) {
throw new Error(`Failed to download manga ${mangaName}: ${error.message}`);
}
}
/**
* Run the downloader with memory management
* @returns {Promise<Object>} Download results
*/
async run() {
const results = {
successful: [],
failed: []
};
try {
// Process manga sequentially to manage memory
for (const [mangaName, mangaUrl] of Object.entries(this.mangaUrls)) {
try {
await this.downloadManga(mangaName, mangaUrl);
results.successful.push(mangaName);
// Clean up after each manga
await this.scraper.cleanup();
// Force garbage collection
if (global.gc) {
global.gc();
}
} catch (error) {
results.failed.push({ name: mangaName, error: error.message });
}
}
} finally {
await this.scraper.close();
}
return results;
}
}
module.exports = MangaDownloader;

24
src/index.js Normal file
View File

@ -0,0 +1,24 @@
require('dotenv').config();
const MangaDownloader = require('./downloaders/MangaDownloader');
/**
* Main entry point for the manga scraper
*/
async function main() {
try {
const downloader = new MangaDownloader();
await downloader.init_colamanga_scraper();
const results = await downloader.run();
// Exit with error code if any downloads failed
if (results.failed.length > 0) {
process.exit(1);
}
} catch (error) {
console.error('Fatal error:', error);
process.exit(1);
}
}
// Run the main function
main();

View File

@ -0,0 +1,72 @@
const config = require('../config/config');
class MangaPoster {
async getMangaId(mangaName) {
const response = await fetch(config.getApiUrl('getMangaId') + '/' + mangaName, {
method: 'GET',
});
return response.json();
}
async getGenreId(genreName) {
const response = await fetch(config.getApiUrl('getGenreId') + '/' + genreName, {
method: 'GET',
});
return response.json();
}
async insertMangaInfo(mangaInfo) {
const response = await fetch(config.getApiUrl('insertMangaInfo'), {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(mangaInfo)
});
return response.json();
}
async insertMangaGenres(mangaId, genres) {
for (const genre of genres) {
let genreId;
let response = await this.getGenreId(genre);
if (response.success == false) {
response = await this.insertGenre({genreName: genre});
}
genreId = response.genreId;
response = await fetch(config.getApiUrl('insertMangaGenres'), {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({mangaId, genreId})
});
}
}
async insertGenre(genre) {
const response = await fetch(config.getApiUrl('insertGenre'), {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(genre)
});
return response.json();
}
async insertMangaChapter(mangaChapter) {
const response = await fetch(config.getApiUrl('insertMangaChapter'), {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(mangaChapter)
});
return response.json();
}
}
module.exports = MangaPoster;

View File

@ -0,0 +1,97 @@
const puppeteer = require('puppeteer');
const config = require('../config/config');
/**
* Base class for manga scrapers
* Provides common functionality for browser management and page handling
*/
class BaseScraper {
constructor() {
this.browser = null;
this.page = null;
this.page_response = {};
}
/**
* Initialize the browser
* @returns {Promise<void>}
*/
async init() {
this.browser = await puppeteer.launch({
headless: config.getScraperSetting('headless'),
args: ['--no-sandbox', '--disable-setuid-sandbox']
});
this.page = (await this.browser.pages())[0];
// Set up response interception
this.page.on('response', async (response) => {
if (this.shouldInterceptResponse(response)) {
if(this.isCoverPic(response)) {
this.page_response['cover'] = await response.buffer();
}
else{
this.page_response[response.url()] = await response.buffer();
}
}
});
console.log("Browser initialized");
}
/**
* Check if a response should be intercepted
* @param {Response} response - Puppeteer response object
* @returns {boolean} Whether to intercept the response
*/
shouldInterceptResponse(response) {
return false; // Override in child classes
}
/**
* Navigate to a URL
* @param {string} url - URL to navigate to
* @returns {Promise<void>}
*/
async navigateTo(url) {
await this.page.goto(url, { waitUntil: 'domcontentloaded' });
}
/**
* Close the browser
* @returns {Promise<void>}
*/
async close() {
if (this.page) {
await this.page.close();
this.page = null;
}
if (this.browser) {
await this.browser.close();
this.browser = null;
}
this.page_response = {};
}
/**
* Scroll the page smoothly to the bottom
* @returns {Promise<void>}
*/
async scrollPage() {
await this.page.evaluate(async () => {
window.scrollTo(0, 0);
await new Promise((resolve) => {
const distance = 100;
const delay = 100;
const scrollInterval = setInterval(() => {
window.scrollBy(0, distance);
if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight) {
clearInterval(scrollInterval);
resolve();
}
}, delay);
});
});
}
}
module.exports = BaseScraper;

View File

@ -0,0 +1,258 @@
const BaseScraper = require('./BaseScraper');
const sharp = require('sharp');
const fs = require('fs').promises;
const path = require('path');
const config = require('../config/config');
/**
* Scraper implementation for colamanga.com
*/
class ColaMangaScraper extends BaseScraper {
constructor() {
super();
this.page_response = new Map(); // Use Map instead of object for better memory management
this.maxConcurrentDownloads = config.getDownloadSetting('maxConcurrentDownloads');
this.downloadQueue = [];
}
/**
* Check if a response should be intercepted
* @param {Response} response - Puppeteer response object
* @returns {boolean} Whether to intercept the response
*/
shouldInterceptResponse(response) {
return response.url().includes('https://res.colamanga.com') ||
/blob:https:\/\/www\.colamanga\.com\//.test(response.url());
}
isCoverPic(response) {
if(response.url().includes('https://res.colamanga.com')) {
return true;
}
return false;
}
/**
* Extract basic manga information from the page
* @returns {Promise<Object>} Basic manga info
*/
async extractBasicInfo() {
const mangaName = await this.page.$eval('.fed-deta-content h1', el => el.textContent.trim());
return {
mangaName: mangaName,
mangaAuthor: '',
mangaStatus: '',
mangaNickname: [],
genres: [],
chapters: []
};
}
/**
* Extract detailed manga information
* @param {Object} mangaInfo - Basic manga info to extend
* @returns {Promise<void>}
*/
async extractDetailedInfo(mangaInfo) {
const elements = await this.page.$$('.fed-deta-content li');
const infoMap = {
'状态': 'mangaStatus',
'作者': 'mangaAuthor',
'别名': 'mangaNickname',
'类别': 'genres'
};
for (const el of elements) {
const span = await this.page.evaluate(e => e.querySelector('span')?.textContent.trim(), el);
const field = infoMap[span];
if (!field) continue;
if (field === 'mangaNickname' || field === 'genres') {
mangaInfo[field] = await this.page.evaluate(e => {
return Array.from(e.querySelectorAll('a')).map(a => a.textContent.trim());
}, el);
} else {
mangaInfo[field] = await this.page.evaluate(e => e.querySelector('a')?.textContent.trim(), el);
}
}
}
/**
* Extract chapter information from the page
* @param {string} mangaUrl - Base manga URL
* @returns {Promise<Array>} Array of chapter objects
*/
async extractChapterInfo(mangaUrl) {
const chapterElements = await this.page.$$('.all_data_list li');
return Promise.all(chapterElements.reverse().map(async (el, index) => {
const chapterName = await el.$eval('a', el => el.textContent.trim());
const chapterUrl = await el.$eval('a', el => el.getAttribute('href'));
return {
chapterName: chapterName,
url: new URL(chapterUrl, mangaUrl).href,
order: index + 1
};
}));
}
/**
* Wait for cover picture to be loaded
* @returns {Promise<Buffer>} Cover image buffer
*/
async waitForCoverPicture() {
const maxWaitTime = config.getScraperSetting('timeout');
const startTime = Date.now();
while (Date.now() - startTime < maxWaitTime) {
if (this.page_response['cover']) {
return this.page_response['cover'];
}
await new Promise(resolve => setTimeout(resolve, config.getDownloadSetting('downloadDelay')));
}
throw new Error('Timeout waiting for cover picture');
}
/**
* Get complete manga information
* @param {string} mangaUrl - Manga URL
* @returns {Promise<Object>} Complete manga information
*/
async getMangaInfo(mangaUrl) {
try {
await this.navigateTo(mangaUrl);
await this.page.waitForSelector('.fed-deta-info', { visible: true });
const mangaInfo = await this.extractBasicInfo();
await this.extractDetailedInfo(mangaInfo);
mangaInfo.chapters = await this.extractChapterInfo(mangaUrl);
mangaInfo.coverPic = await this.waitForCoverPicture();
return mangaInfo;
} catch (error) {
throw new Error(`Failed to get manga info: ${error.message}`);
}
}
/**
* Download chapter images
* @param {Object} chapter - Chapter information
* @param {string} chapterDir - Chapter directory
* @returns {Promise<void>}
*/
async downloadChapterPics(chapter, chapterDir = ".") {
try {
await this.navigateTo(chapter.url);
await this.page.waitForSelector('.mh_mangalist', { visible: true });
await this.scrollPage();
await this.saveImages(chapterDir);
this.page_response.clear();
} catch (error) {
throw new Error(`Failed to download chapter ${chapter.name}: ${error.message}`);
}
}
/**
* Wait for all chapter images to load
* @returns {Promise<number>} Total number of images
*/
async saveImages(chapterDir) {
const loadingElements = await this.page.$$('.mh_comicpic');
const reversedElements = loadingElements.reverse();
let totalImages = 0;
for (const element of reversedElements) {
const pValue = await this.page.evaluate(el => el.getAttribute('p'), element);
await this.page.evaluate(el => {
const rect = el.getBoundingClientRect();
window.scrollTo({
top: window.scrollY + rect.top - (window.innerHeight / 2),
behavior: 'smooth'
});
}, element);
await new Promise(resolve => setTimeout(resolve, 1000));
const loadingDiv = await element.$('.mh_loading');
let isVisible = true;
if (loadingDiv) {
isVisible = await this.page.evaluate(el => {
return el.offsetParent !== null;
}, loadingDiv);
}
if (isVisible) {
if (totalImages == 0) {
totalImages = pValue;
}
const imgDiv = await element.$('img');
const imgSrc = await this.page.evaluate(el => el.src, imgDiv);
while (!this.page_response[imgSrc]) {
await new Promise(resolve => setTimeout(resolve, 100));
}
await this.saveImage(imgSrc, this.page_response[imgSrc], chapterDir);
}
}
return totalImages;
}
/**
* Get image order from page
* @param {string} src - Image source URL
* @returns {Promise<string>} Image order
*/
async getImgOrder(src) {
const loadingAttributes = await this.page.$$eval('.mh_comicpic', (elements, src) => {
return elements
.filter(el => el.querySelector(`img[src="${src}"]`))
.map(el => el.getAttribute('p'))
.map(str => str.padStart(elements.length.toString().length, '0'));
}, src);
return loadingAttributes[0];
}
/**
* Process and save a single image
* @param {string} url - Image URL
* @param {Buffer} buffer - Image buffer
* @param {string} chapterDir - Chapter directory
* @returns {Promise<void>}
*/
async saveImage(url, buffer, chapterDir) {
try {
const order = await this.getImgOrder(url);
const filename = `${order}.webp`;
const filepath = path.join(chapterDir, filename);
// Process image with sharp using streams for better memory management
await sharp(buffer)
.webp({
quality: config.getDownloadSetting('imageQuality')
})
.toFormat('webp')
.toFile(filepath);
// Clear the buffer from memory
buffer = null;
return true;
} catch (error) {
console.error(`Failed to process image ${url}: ${error.message}`);
throw error;
}
}
/**
* Clean up resources
*/
async cleanup() {
this.page_response.clear();
if (this.page) {
await this.page.evaluate(() => {
// Clear browser memory
if (window.gc) {
window.gc();
}
});
}
}
}
module.exports = ColaMangaScraper;

157
src/utils/helpers.js Normal file
View File

@ -0,0 +1,157 @@
const fs = require('fs');
const path = require('path');
const { v4: uuidv4 } = require('uuid');
/**
* Utility functions for the manga scraper
*/
class Helpers {
/**
* Sanitize a string to be used as a filename
* @param {string} str - String to sanitize
* @returns {string} Sanitized string
*/
static sanitizeFileName(str) {
const invalidChars = /[<>:"/\\|?*\x00-\x1F]/g;
return str.replace(invalidChars, '_')
.replace(/\s+/g, '_')
.trim();
}
/**
* Generate a smart delay based on time of day
* @returns {number} Delay in milliseconds
*/
static getSmartDelay() {
const hour = new Date().getHours();
let minDelay, maxDelay;
// Midnight to 6 AM: 20-30 minutes
if (hour >= 0 && hour < 6) {
minDelay = 20 * 60 * 1000;
maxDelay = 30 * 60 * 1000;
}
// 6 PM to midnight: 5-8 minutes
else if (hour >= 18 && hour < 24) {
minDelay = 5 * 60 * 1000;
maxDelay = 8 * 60 * 1000;
}
// Day time: 10-12 minutes
else {
minDelay = 10 * 60 * 1000;
maxDelay = 12 * 60 * 1000;
}
return minDelay + Math.random() * (maxDelay - minDelay);
}
/**
* Format delay time for logging
* @param {number} delay - Delay in milliseconds
* @returns {string} Formatted delay string
*/
static formatDelay(delay) {
const minutes = Math.floor(delay / 60000);
const seconds = Math.floor((delay % 60000) / 1000);
return `${minutes} minutes and ${seconds} seconds`;
}
/**
* Generate a unique ID
* @returns {string} UUID v4
*/
static generateId() {
return uuidv4();
}
/**
* Ensure directory exists
* @param {string} dirPath - Directory path
* @returns {Promise<void>}
*/
static async ensureDirectory(dirPath) {
try {
await fs.promises.mkdir(dirPath, { recursive: true });
} catch (error) {
throw new Error(`Failed to create directory ${dirPath}: ${error.message}`);
}
}
/**
* Read JSON file
* @param {string} filePath - Path to JSON file
* @returns {Promise<Object>} Parsed JSON data
*/
static async readJsonFile(filePath) {
try {
const data = await fs.promises.readFile(filePath, 'utf8');
return JSON.parse(data);
} catch (error) {
throw new Error(`Failed to read JSON file ${filePath}: ${error.message}`);
}
}
/**
* Write JSON file
* @param {string} filePath - Path to JSON file
* @param {Object} data - Data to write
* @returns {Promise<void>}
*/
static async writeJsonFile(filePath, data) {
try {
await fs.promises.writeFile(filePath, JSON.stringify(data, null, 2));
} catch (error) {
throw new Error(`Failed to write JSON file ${filePath}: ${error.message}`);
}
}
/**
* Check if file exists
* @param {string} filePath - Path to file
* @returns {Promise<boolean>}
*/
static async fileExists(filePath) {
try {
await fs.promises.access(filePath);
return true;
} catch {
return false;
}
}
/**
* Get file extension from URL
* @param {string} url - URL to get extension from
* @returns {string} File extension
*/
static getFileExtension(url) {
try {
const parsedUrl = new URL(url);
const pathname = parsedUrl.pathname;
const extension = path.extname(pathname).toLowerCase();
return extension || '.webp'; // Default to .webp if no extension found
} catch {
return '.webp';
}
}
/**
* Check if directory exists and is not empty
* @param {string} dirPath - Directory path to check
* @returns {Promise<boolean>} True if directory exists and is not empty, false otherwise
*/
static async isDirectoryNotEmpty(dirPath) {
try {
const stats = await fs.promises.stat(dirPath);
if (!stats.isDirectory()) {
return false;
}
const files = await fs.promises.readdir(dirPath);
return files.length > 0;
} catch (error) {
return false;
}
}
}
module.exports = Helpers;