From 66b78bca1d91362c9832f1396890b1953998ba22 Mon Sep 17 00:00:00 2001 From: yiekheng Date: Wed, 14 May 2025 16:20:56 +0800 Subject: [PATCH] Updated scraper for new server --- .gitignore | 3 +- colamanga_scraper.js | 131 --------------- index.js | 8 - package-lock.json | 231 ++++++++++++++++++-------- package.json | 22 ++- scraper.js | 81 --------- src/config/config.js | 81 +++++++++ src/downloaders/MangaDownloader.js | 134 +++++++++++++++ src/index.js | 24 +++ src/posters/MangaPoster.js | 72 ++++++++ src/scrapers/BaseScraper.js | 97 +++++++++++ src/scrapers/ColaMangaScraper.js | 258 +++++++++++++++++++++++++++++ src/utils/helpers.js | 157 ++++++++++++++++++ 13 files changed, 1004 insertions(+), 295 deletions(-) delete mode 100644 colamanga_scraper.js delete mode 100644 index.js delete mode 100644 scraper.js create mode 100644 src/config/config.js create mode 100644 src/downloaders/MangaDownloader.js create mode 100644 src/index.js create mode 100644 src/posters/MangaPoster.js create mode 100644 src/scrapers/BaseScraper.js create mode 100644 src/scrapers/ColaMangaScraper.js create mode 100644 src/utils/helpers.js diff --git a/.gitignore b/.gitignore index 671a702..a3186ba 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ /node_modules -/test \ No newline at end of file +/test +.env \ No newline at end of file diff --git a/colamanga_scraper.js b/colamanga_scraper.js deleted file mode 100644 index 654c93b..0000000 --- a/colamanga_scraper.js +++ /dev/null @@ -1,131 +0,0 @@ -const { BaseScraper } = require('./scraper.js'); -const sharp = require('sharp'); -const fs = require('fs'); -const path = require('path'); - -class ColaMangaScraper extends BaseScraper { - constructor() { - super(); - } - - async saveBufferAsWebp(buffer, filename, dir = '.') { - const dirPath = path.resolve(dir); - const filePath = path.join(dirPath, filename); - - try { - await fs.promises.mkdir(dirPath, { recursive: true }); - await sharp(buffer).webp({ quality: 80 }).toFormat('webp').toFile(filePath); - } catch (error) { - console.error(`Failed to save ${filename}:`, error); - } - } - - async getMangaInfo(mangaUrl) { - const page = await this.loadPage(mangaUrl); - await page.waitForSelector('.fed-deta-info', { visible: true }); - const mangaName = await page.$eval('.fed-deta-content h1', el => el.textContent); - const elements = await page.$$('.fed-deta-content li'); - const mangaInfo = { - name: mangaName, - author: '', - nickNames: [], - genres: [], - status: '', - chapters: [] - }; - for (const el of elements) { - const span = await el.$eval('span', el => el.textContent.trim()); - if (span === '状态') { - mangaInfo.status = await el.$eval('a', el => el.textContent); - } else if (span === '作者') { - mangaInfo.author = await el.$eval('a', el => el.textContent); - } else if (span === '别名') { - mangaInfo.nickNames = await el.$$eval('a', els => els.map(el => el.textContent)); - } else if (span === '类别') { - mangaInfo.genres = await el.$$eval('a', els => els.map(el => el.textContent)); - } - } - const chapterElements = await page.$$('.all_data_list li'); - mangaInfo.chapters = await Promise.all(chapterElements.map(async el => { - const chapterName = await el.$eval('a', el => el.textContent); - const chapterUrl = await el.$eval('a', el => el.getAttribute('href')); - return { - name: chapterName, - url: chapterUrl - }; - })); - while (!this.pages_response[this.pages.indexOf(page)]) { - await new Promise(resolve => setTimeout(resolve, 100)); - } - mangaInfo.coverPic = this.pages_response[this.pages.indexOf(page)]; - await this.closePage(page); - return mangaInfo; - } - - async downloadChapterPics(chapter, chapterDir = ".") { - const directoryPath = path.resolve(chapterDir); - if (fs.existsSync(directoryPath)) { - console.log(`Skipping ${chapter.name} as it already exists`); - return; - } - fs.mkdirSync(directoryPath, { recursive: true }); - - const page = await this.loadPage(chapter.url); - const pageIndex = this.pages.indexOf(page); - await page.waitForSelector('.mh_mangalist', { visible: true }); - - for (let attempt = 0; attempt < 10; attempt++) { - console.log(`Downloading ${chapter.name}, attempt ${attempt + 1}`); - await this.scrollPage(page); - const loadingElements = await page.$$eval('.mh_loading:not([style*="display: none"])', elements => elements.length); - await new Promise(resolve => setTimeout(resolve, 1000)); - if (loadingElements === 0 && Object.keys(this.pages_response[pageIndex]).length !== 0) { - break; - } - } - - const responses = this.pages_response[pageIndex]; - for (const [url, response] of Object.entries(responses)) { - const fileName = (await this.getImgOrder(page, url)) + '.webp'; - const buffer = await (new Blob([response], { type: 'image/webp' })).arrayBuffer(); - await this.saveBufferToWebp(buffer, fileName, chapterDir); - } - - await this.closePage(page); - } - - async downloadChapter(chapters, dir = ".") { - const dirPath = path.resolve(dir); - if (!fs.existsSync(dirPath)) { - fs.mkdirSync(dirPath, { recursive: true }); - } - // const chapter = chapters[Math.floor(Math.random() * chapters.length)]; - for (const chapter of chapters) { - await this.downloadChapterPics(chapter, path.join(dir, chapter.name)); - } - } - - async getImgOrder(page, src) { - const loadingAttributes = await page.$$eval('.mh_comicpic', (elements, src) => { - return elements - .filter(el => el.querySelector(`img[src="${src}"]`)) - .map(el => el.getAttribute('p')) - .map(str => str.padStart(elements.length.toString().length, '0')); - }, src); - return loadingAttributes; - } -} - -(async () => { - const scraper = new ColaMangaScraper(); - await scraper.init(); - const mangaUrl = 'https://www.colamanga.com/manga-od825111/'; - const mangaInfo = await scraper.getMangaInfo(mangaUrl); - await scraper.saveBufferToWebp(mangaInfo.coverPic, 'cover.webp', 'test'); - await scraper.downloadChapter(mangaInfo.chapters, 'test'); - await scraper.closeAllPages(); - console.log(mangaInfo); -})(); - - - diff --git a/index.js b/index.js deleted file mode 100644 index 2501314..0000000 --- a/index.js +++ /dev/null @@ -1,8 +0,0 @@ -const axios = require('axios'); - -const postData = { - mangaId: 'yourMangaId', - mangaName: 'yourMangaName' -}; - -axios.post('http://localhost:4000/insert', postData) \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index e09cda7..bacc705 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,18 +1,19 @@ { - "name": "sunnymh-scrap", + "name": "sunnymh-scraper", "version": "1.0.0", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "sunnymh-scrap", + "name": "sunnymh-scraper", "version": "1.0.0", "license": "ISC", "dependencies": { - "axios": "^1.8.4", - "fs": "^0.0.1-security", - "puppeteer": "^24.4.0", - "sharp": "^0.33.5" + "axios": "^1.6.7", + "dotenv": "^16.5.0", + "puppeteer": "^22.0.0", + "sharp": "^0.33.2", + "uuid": "^9.0.1" } }, "node_modules/@babel/code-frame": { @@ -410,17 +411,18 @@ } }, "node_modules/@puppeteer/browsers": { - "version": "2.8.0", - "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.8.0.tgz", - "integrity": "sha512-yTwt2KWRmCQAfhvbCRjebaSX8pV1//I0Y3g+A7f/eS7gf0l4eRJoUCvcYdVtboeU4CTOZQuqYbZNS8aBYb8ROQ==", + "version": "2.3.0", + "resolved": "https://registry.npmjs.org/@puppeteer/browsers/-/browsers-2.3.0.tgz", + "integrity": "sha512-ioXoq9gPxkss4MYhD+SFaU9p1IHFUX0ILAWFPyjGaBdjLsYAlZw6j1iLA0N/m12uVHLFDfSYNF7EQccjinIMDA==", "license": "Apache-2.0", "dependencies": { - "debug": "^4.4.0", + "debug": "^4.3.5", "extract-zip": "^2.0.1", "progress": "^2.0.3", - "proxy-agent": "^6.5.0", - "semver": "^7.7.1", - "tar-fs": "^3.0.8", + "proxy-agent": "^6.4.0", + "semver": "^7.6.3", + "tar-fs": "^3.0.6", + "unbzip2-stream": "^1.4.3", "yargs": "^17.7.2" }, "bin": { @@ -437,13 +439,13 @@ "license": "MIT" }, "node_modules/@types/node": { - "version": "22.13.13", - "resolved": "https://registry.npmjs.org/@types/node/-/node-22.13.13.tgz", - "integrity": "sha512-ClsL5nMwKaBRwPcCvH8E7+nU4GxHVx1axNvMZTFHMEfNI7oahimt26P5zjVCRrjiIWj6YFXfE1v3dEp94wLcGQ==", + "version": "22.15.15", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.15.15.tgz", + "integrity": "sha512-R5muMcZob3/Jjchn5LcO8jdKwSCbzqmPB6ruBxMcf9kbxtniZHP327s6C37iOfuw8mbKK3cAQa7sEl7afLrQ8A==", "license": "MIT", "optional": true, "dependencies": { - "undici-types": "~6.20.0" + "undici-types": "~6.21.0" } }, "node_modules/@types/yauzl": { @@ -538,9 +540,9 @@ "optional": true }, "node_modules/bare-fs": { - "version": "4.0.2", - "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.0.2.tgz", - "integrity": "sha512-S5mmkMesiduMqnz51Bfh0Et9EX0aTCJxhsI4bvzFFLs8Z1AV8RDHadfY5CyLwdoLHgXbNBEN1gQcbEtGwuvixw==", + "version": "4.1.4", + "resolved": "https://registry.npmjs.org/bare-fs/-/bare-fs-4.1.4.tgz", + "integrity": "sha512-r8+26Voz8dGX3AYpJdFb1ZPaUSM8XOLCZvy+YGpRTmwPHIxA7Z3Jov/oMPtV7hfRQbOnH8qGlLTzQAbgtdNN0Q==", "license": "Apache-2.0", "optional": true, "dependencies": { @@ -602,6 +604,26 @@ } } }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/basic-ftp": { "version": "5.0.5", "resolved": "https://registry.npmjs.org/basic-ftp/-/basic-ftp-5.0.5.tgz", @@ -611,6 +633,30 @@ "node": ">=10.0.0" } }, + "node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, "node_modules/buffer-crc32": { "version": "0.2.13", "resolved": "https://registry.npmjs.org/buffer-crc32/-/buffer-crc32-0.2.13.tgz", @@ -643,13 +689,14 @@ } }, "node_modules/chromium-bidi": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-2.1.2.tgz", - "integrity": "sha512-vtRWBK2uImo5/W2oG6/cDkkHSm+2t6VHgnj+Rcwhb0pP74OoUb4GipyRX/T/y39gYQPhioP0DPShn+A7P6CHNw==", + "version": "0.6.3", + "resolved": "https://registry.npmjs.org/chromium-bidi/-/chromium-bidi-0.6.3.tgz", + "integrity": "sha512-qXlsCmpCZJAnoTYI83Iu6EdYQpMYdVkCfq08KDh2pmlVqK5t5IA9mGs4/LwCwp4fqisSOMXZxP3HIh8w8aRn0A==", "license": "Apache-2.0", "dependencies": { - "mitt": "^3.0.1", - "zod": "^3.24.1" + "mitt": "3.0.1", + "urlpattern-polyfill": "10.0.0", + "zod": "3.23.8" }, "peerDependencies": { "devtools-protocol": "*" @@ -807,11 +854,23 @@ } }, "node_modules/devtools-protocol": { - "version": "0.0.1413902", - "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1413902.tgz", - "integrity": "sha512-yRtvFD8Oyk7C9Os3GmnFZLu53yAfsnyw1s+mLmHHUK0GQEc9zthHWvS1r67Zqzm5t7v56PILHIVZ7kmFMaL2yQ==", + "version": "0.0.1312386", + "resolved": "https://registry.npmjs.org/devtools-protocol/-/devtools-protocol-0.0.1312386.tgz", + "integrity": "sha512-DPnhUXvmvKT2dFA/j7B+riVLUt9Q6RKJlcppojL5CoRywJJKLDYnRlw0gTFKfgDPHP5E04UoB71SxoJlVZy8FA==", "license": "BSD-3-Clause" }, + "node_modules/dotenv": { + "version": "16.5.0", + "resolved": "https://registry.npmjs.org/dotenv/-/dotenv-16.5.0.tgz", + "integrity": "sha512-m/C+AwOAr9/W1UOIZUo232ejMNnJAJtYQjUbHoNTBNTJSvqzzDh7vnrei3o3r3m9blf6ZoDkvcw0VmozNRFJxg==", + "license": "BSD-2-Clause", + "engines": { + "node": ">=12" + }, + "funding": { + "url": "https://dotenvx.com" + } + }, "node_modules/dunder-proto": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/dunder-proto/-/dunder-proto-1.0.1.tgz", @@ -1035,12 +1094,6 @@ "node": ">= 6" } }, - "node_modules/fs": { - "version": "0.0.1-security", - "resolved": "https://registry.npmjs.org/fs/-/fs-0.0.1-security.tgz", - "integrity": "sha512-3XY9e1pP0CVEUCdj5BmfIZxRBTSDycnbqhIOGec9QYtmVH2fbLpj86CFWkrNOkt/Fvty4KZG5lTglL9j/gJ87w==", - "license": "ISC" - }, "node_modules/function-bind": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/function-bind/-/function-bind-1.1.2.tgz", @@ -1202,6 +1255,26 @@ "node": ">= 14" } }, + "node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "BSD-3-Clause" + }, "node_modules/import-fresh": { "version": "3.3.1", "resolved": "https://registry.npmjs.org/import-fresh/-/import-fresh-3.3.1.tgz", @@ -1470,38 +1543,35 @@ } }, "node_modules/puppeteer": { - "version": "24.4.0", - "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-24.4.0.tgz", - "integrity": "sha512-E4JhJzjS8AAI+6N/b+Utwarhz6zWl3+MR725fal+s3UlOlX2eWdsvYYU+Q5bXMjs9eZEGkNQroLkn7j11s2k1Q==", + "version": "22.15.0", + "resolved": "https://registry.npmjs.org/puppeteer/-/puppeteer-22.15.0.tgz", + "integrity": "sha512-XjCY1SiSEi1T7iSYuxS82ft85kwDJUS7wj1Z0eGVXKdtr5g4xnVcbjwxhq5xBnpK/E7x1VZZoJDxpjAOasHT4Q==", "hasInstallScript": true, "license": "Apache-2.0", "dependencies": { - "@puppeteer/browsers": "2.8.0", - "chromium-bidi": "2.1.2", + "@puppeteer/browsers": "2.3.0", "cosmiconfig": "^9.0.0", - "devtools-protocol": "0.0.1413902", - "puppeteer-core": "24.4.0", - "typed-query-selector": "^2.12.0" + "devtools-protocol": "0.0.1312386", + "puppeteer-core": "22.15.0" }, "bin": { - "puppeteer": "lib/cjs/puppeteer/node/cli.js" + "puppeteer": "lib/esm/puppeteer/node/cli.js" }, "engines": { "node": ">=18" } }, "node_modules/puppeteer-core": { - "version": "24.4.0", - "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-24.4.0.tgz", - "integrity": "sha512-eFw66gCnWo0X8Hyf9KxxJtms7a61NJVMiSaWfItsFPzFBsjsWdmcNlBdsA1WVwln6neoHhsG+uTVesKmTREn/g==", + "version": "22.15.0", + "resolved": "https://registry.npmjs.org/puppeteer-core/-/puppeteer-core-22.15.0.tgz", + "integrity": "sha512-cHArnywCiAAVXa3t4GGL2vttNxh7GqXtIYGym99egkNJ3oG//wL9LkvO4WE8W1TJe95t1F1ocu9X4xWaGsOKOA==", "license": "Apache-2.0", "dependencies": { - "@puppeteer/browsers": "2.8.0", - "chromium-bidi": "2.1.2", - "debug": "^4.4.0", - "devtools-protocol": "0.0.1413902", - "typed-query-selector": "^2.12.0", - "ws": "^8.18.1" + "@puppeteer/browsers": "2.3.0", + "chromium-bidi": "0.6.3", + "debug": "^4.3.6", + "devtools-protocol": "0.0.1312386", + "ws": "^8.18.0" }, "engines": { "node": ">=18" @@ -1718,25 +1788,54 @@ "b4a": "^1.6.4" } }, + "node_modules/through": { + "version": "2.3.8", + "resolved": "https://registry.npmjs.org/through/-/through-2.3.8.tgz", + "integrity": "sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==", + "license": "MIT" + }, "node_modules/tslib": { "version": "2.8.1", "resolved": "https://registry.npmjs.org/tslib/-/tslib-2.8.1.tgz", "integrity": "sha512-oJFu94HQb+KVduSUQL7wnpmqnfmLsOA/nAh6b6EH0wCEoK0/mPeXU6c3wKDV83MkOuHPRHtSXKKU99IBazS/2w==", "license": "0BSD" }, - "node_modules/typed-query-selector": { - "version": "2.12.0", - "resolved": "https://registry.npmjs.org/typed-query-selector/-/typed-query-selector-2.12.0.tgz", - "integrity": "sha512-SbklCd1F0EiZOyPiW192rrHZzZ5sBijB6xM+cpmrwDqObvdtunOHHIk9fCGsoK5JVIYXoyEp4iEdE3upFH3PAg==", - "license": "MIT" + "node_modules/unbzip2-stream": { + "version": "1.4.3", + "resolved": "https://registry.npmjs.org/unbzip2-stream/-/unbzip2-stream-1.4.3.tgz", + "integrity": "sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==", + "license": "MIT", + "dependencies": { + "buffer": "^5.2.1", + "through": "^2.3.8" + } }, "node_modules/undici-types": { - "version": "6.20.0", - "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", - "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", "license": "MIT", "optional": true }, + "node_modules/urlpattern-polyfill": { + "version": "10.0.0", + "resolved": "https://registry.npmjs.org/urlpattern-polyfill/-/urlpattern-polyfill-10.0.0.tgz", + "integrity": "sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==", + "license": "MIT" + }, + "node_modules/uuid": { + "version": "9.0.1", + "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.1.tgz", + "integrity": "sha512-b+1eJOlsR9K8HJpow9Ok3fiWOWSIcIzXodvv0rQjVoOVNpWMpxf1wZNpt4y9h10odCNrqnYp1OBzRktckBe3sA==", + "funding": [ + "https://github.com/sponsors/broofa", + "https://github.com/sponsors/ctavan" + ], + "license": "MIT", + "bin": { + "uuid": "dist/bin/uuid" + } + }, "node_modules/wrap-ansi": { "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", @@ -1761,9 +1860,9 @@ "license": "ISC" }, "node_modules/ws": { - "version": "8.18.1", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.1.tgz", - "integrity": "sha512-RKW2aJZMXeMxVpnZ6bck+RswznaxmzdULiBr6KY7XkTnW8uvt0iT9H5DkHUChXrc+uurzwa0rVI16n/Xzjdz1w==", + "version": "8.18.2", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.18.2.tgz", + "integrity": "sha512-DMricUmwGZUVr++AEAe2uiVM7UoO9MAVZMDu05UQOaUII0lp+zOzLLU4Xqh/JvTqklB1T4uELaaPBKyjE1r4fQ==", "license": "MIT", "engines": { "node": ">=10.0.0" @@ -1828,9 +1927,9 @@ } }, "node_modules/zod": { - "version": "3.24.2", - "resolved": "https://registry.npmjs.org/zod/-/zod-3.24.2.tgz", - "integrity": "sha512-lY7CDW43ECgW9u1TcT3IoXHflywfVqDYze4waEz812jR/bZ8FHDsl7pFQoSZTz5N+2NqRXs8GBwnAwo3ZNxqhQ==", + "version": "3.23.8", + "resolved": "https://registry.npmjs.org/zod/-/zod-3.23.8.tgz", + "integrity": "sha512-XBx9AXhXktjUqnepgTiE5flcKIYWi/rme0Eaj+5Y0lftuGBq+jyRu/md4WnuxqgP1ubdpNCsYEYPxrzVHD8d6g==", "license": "MIT", "funding": { "url": "https://github.com/sponsors/colinhacks" diff --git a/package.json b/package.json index 04ac55d..c90d81a 100644 --- a/package.json +++ b/package.json @@ -1,18 +1,24 @@ { - "name": "sunnymh-scrap", + "name": "sunnymh-scraper", "version": "1.0.0", - "main": "index.cjs", + "description": "Manga scraper", + "main": "src/index.js", "scripts": { - "start": "node index.cjs", + "start": "node src/index.js", "test": "echo \"Error: no test specified\" && exit 1" }, + "keywords": [ + "manga", + "scraper", + "web-scraping" + ], "author": "", "license": "ISC", - "description": "", "dependencies": { - "axios": "^1.8.4", - "fs": "^0.0.1-security", - "puppeteer": "^24.4.0", - "sharp": "^0.33.5" + "axios": "^1.6.7", + "dotenv": "^16.5.0", + "puppeteer": "^22.0.0", + "sharp": "^0.33.2", + "uuid": "^9.0.1" } } diff --git a/scraper.js b/scraper.js deleted file mode 100644 index cc0936a..0000000 --- a/scraper.js +++ /dev/null @@ -1,81 +0,0 @@ -const puppeteer = require('puppeteer'); - -exports.BaseScraper = class BaseScraper { - constructor() { - this.browser = null; - this.pages = []; - this.pages_response = []; - } - - async init() { - this.browser = await puppeteer.launch({ - // headless: false, - args: ['--no-sandbox', '--disable-setuid-sandbox'] - }); - } - - async openPage(url) { - const page = await this.browser.newPage(); - - page.on('response', async (response) => { - const pageIndex = this.pages.indexOf(page); - - if (response.url().includes('https://res.colamanga.com')) { - this.pages_response[pageIndex] = await response.buffer(); - } - else if (/blob:https:\/\/www\.colamanga\.com\//.test(response.url())) { - if (!this.pages_response[pageIndex]) { - this.pages_response[pageIndex] = {}; - } - this.pages_response[pageIndex][response.url()] = await response.buffer(); - } - - }); - - await page.goto(url, { waitUntil: 'domcontentloaded' }); - this.pages.push(page); - return page; - } - - - async closePage(page) { - const pageIndex = this.pages.indexOf(page); - if (pageIndex !== -1) { - this.pages.splice(pageIndex, 1); - this.pages_response.splice(pageIndex, 1); - } - await page.close(); - } - - async closeAllPages() { - await Promise.all(this.pages.map(page => page.close())); - this.pages = []; - this.pages_response = []; - await this.browser.close(); - } - - async loadPage(url) { - const page = await this.openPage(url); - return page; - } - - async scrollPage(page) { - await page.evaluate(async () => { - window.scrollTo(0, 0); - await new Promise((resolve) => { - const distance = 100; // distance to scroll - const delay = 100; // delay between scrolls - const scrollInterval = setInterval(() => { - window.scrollBy(0, distance); - if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight) { - clearInterval(scrollInterval); - resolve(); - } - }, delay); - }); - }); - } - -} - - diff --git a/src/config/config.js b/src/config/config.js new file mode 100644 index 0000000..ec4dd7a --- /dev/null +++ b/src/config/config.js @@ -0,0 +1,81 @@ +/** + * Configuration module for the manga scraper + */ +class Config { + constructor() { + this.loadConfig(); + } + + /** + * Load configuration from environment variables with defaults + */ + loadConfig() { + this.baseDir = process.env.MANGA_BASE_DIR; + this.apiBaseUrl = process.env.API_URL; + this.mangaUrlsPath = process.env.MANGA_URLS_PATH + + // API endpoints + this.endpoints = { + insertMangaInfo: `${this.apiBaseUrl}/manga/insertMangaInfo`, + insertMangaChapter: `${this.apiBaseUrl}/manga/insertMangaChapter`, + insertMangaGenres: `${this.apiBaseUrl}/manga/insertMangaGenres`, + insertGenre: `${this.apiBaseUrl}/genre/insertGenre`, + getGenreId: `${this.apiBaseUrl}/genre/getGenreId`, + getMangaId: `${this.apiBaseUrl}/manga/getMangaId` + }; + + // Scraper settings + this.scraper = { + headless: process.env.SCRAPER_HEADLESS === 'true' ? true : false, + timeout: parseInt(process.env.SCRAPER_TIMEOUT, 10), + retryAttempts: parseInt(process.env.SCRAPER_RETRY_ATTEMPTS, 10), + retryDelay: parseInt(process.env.SCRAPER_RETRY_DELAY, 10) + }; + + // Download settings + this.download = { + imageQuality: parseInt(process.env.IMAGE_QUALITY, 10), + maxConcurrentDownloads: parseInt(process.env.MAX_CONCURRENT_DOWNLOADS, 10), + downloadDelay: parseInt(process.env.DOWNLOAD_DELAY, 10) + }; + } + + /** + * Get API endpoint URL + * @param {string} endpoint - Endpoint name + * @returns {string} Full API URL + */ + getApiUrl(endpoint) { + if (!this.endpoints[endpoint]) { + throw new Error(`Unknown API endpoint: ${endpoint}`); + } + return this.endpoints[endpoint]; + } + + /** + * Get scraper setting + * @param {string} setting - Setting name + * @returns {any} Setting value + */ + getScraperSetting(setting) { + if (!(setting in this.scraper)) { + throw new Error(`Unknown scraper setting: ${setting}`); + } + return this.scraper[setting]; + } + + /** + * Get download setting + * @param {string} setting - Setting name + * @returns {any} Setting value + */ + getDownloadSetting(setting) { + if (!(setting in this.download)) { + throw new Error(`Unknown download setting: ${setting}`); + } + return this.download[setting]; + } +} + +// Export singleton instance +module.exports = new Config(); \ No newline at end of file diff --git a/src/downloaders/MangaDownloader.js b/src/downloaders/MangaDownloader.js new file mode 100644 index 0000000..3b5b659 --- /dev/null +++ b/src/downloaders/MangaDownloader.js @@ -0,0 +1,134 @@ +const fs = require('fs').promises; +const path = require('path'); +const helpers = require('../utils/helpers'); +const ColaMangaScraper = require('../scrapers/ColaMangaScraper'); +const MangaPoster = require('../posters/MangaPoster'); +const config = require('../config/config'); + +/** + * Main class for downloading manga + */ +class MangaDownloader { + constructor() { + this.outputDir = config.baseDir; + this.maxConcurrentManga = config.getDownloadSetting('maxConcurrentDownloads'); + this.mangaPoster = new MangaPoster(); + } + + /** + * Initialize the downloader + * @returns {Promise} + */ + async init_colamanga_scraper() { + this.mangaUrls = await helpers.readJsonFile(config.mangaUrlsPath) || {}; + this.scraper = new ColaMangaScraper(); + await this.scraper.init(); + } + + /** + * Download a single manga with memory management + * @param {string} mangaName - Manga name + * @param {string} mangaUrl - Manga URL + * @returns {Promise} + */ + async downloadManga(mangaName, mangaUrl) { + console.log(`Getting Manga: ${mangaName} info`); + + try { + const mangaDetails = await this.scraper.getMangaInfo(mangaUrl); + + // // Save manga info + // const infoPath = path.join(this.outputDir, 'info.json'); + // await fs.writeFile(infoPath, JSON.stringify(mangaDetails, null, 2)); + + // Load manga info + // const mangaDetails = await helpers.readJsonFile(path.join(this.outputDir, 'info.json')); + + const mangaDir = path.join(this.outputDir, mangaName); + await helpers.ensureDirectory(mangaDir); + + // Save cover image + if (mangaDetails.coverPic && await helpers.fileExists(path.join(mangaDir, 'cover.jpg')) == false) { + const coverPath = path.join(mangaDir, 'cover.jpg'); + await fs.writeFile(coverPath, mangaDetails.coverPic); + mangaDetails.coverPic = null; + } + + const { chapters, coverPic, genres, ...mangaInfo } = mangaDetails; + let response = await this.mangaPoster.getMangaId(mangaDetails.mangaName); + + if (response.success == true) { + mangaDetails.mangaId = response.mangaId; + } + else { + mangaDetails.mangaId = helpers.generateId(); + this.mangaPoster.insertMangaInfo({mangaId: mangaDetails.mangaId, ...mangaInfo}); + this.mangaPoster.insertMangaGenres(mangaDetails.mangaId, genres); + } + + // Download chapters in sequence to manage memory + for (const chapter of chapters) { + if (await helpers.isDirectoryNotEmpty(path.join(mangaDir, chapter.chapterName))) { + console.log(`Skipping Manga: ${mangaDetails.mangaName}, Chapter: ${chapter.order} - ${chapter.chapterName} as it already exists`); + continue; + } + console.log(`Downloading Manga: ${mangaDetails.mangaName}, Chapter: ${chapter.order} - ${chapter.chapterName}`); + const chapterDir = path.join(mangaDir, chapter.chapterName); + await helpers.ensureDirectory(chapterDir); + await this.scraper.downloadChapterPics(chapter, chapterDir); + await this.mangaPoster.insertMangaChapter({ + mangaId: mangaDetails.mangaId, + chapterName: chapter.chapterName, + chapterOrder: chapter.order + }); + await this.scraper.cleanup(); + + // Add smart delay between chapter downloads + const delay = helpers.getSmartDelay(); + console.log(`Waiting ${helpers.formatDelay(delay)} before next chapter...`); + await new Promise(resolve => setTimeout(resolve, delay)); + } + + console.log(`Completed downloading: ${mangaName}`); + } catch (error) { + throw new Error(`Failed to download manga ${mangaName}: ${error.message}`); + } + } + + /** + * Run the downloader with memory management + * @returns {Promise} Download results + */ + async run() { + const results = { + successful: [], + failed: [] + }; + + try { + // Process manga sequentially to manage memory + for (const [mangaName, mangaUrl] of Object.entries(this.mangaUrls)) { + try { + await this.downloadManga(mangaName, mangaUrl); + results.successful.push(mangaName); + + // Clean up after each manga + await this.scraper.cleanup(); + + // Force garbage collection + if (global.gc) { + global.gc(); + } + } catch (error) { + results.failed.push({ name: mangaName, error: error.message }); + } + } + } finally { + await this.scraper.close(); + } + + return results; + } +} + +module.exports = MangaDownloader; diff --git a/src/index.js b/src/index.js new file mode 100644 index 0000000..68da93d --- /dev/null +++ b/src/index.js @@ -0,0 +1,24 @@ +require('dotenv').config(); +const MangaDownloader = require('./downloaders/MangaDownloader'); + +/** + * Main entry point for the manga scraper + */ +async function main() { + try { + const downloader = new MangaDownloader(); + await downloader.init_colamanga_scraper(); + const results = await downloader.run(); + + // Exit with error code if any downloads failed + if (results.failed.length > 0) { + process.exit(1); + } + } catch (error) { + console.error('Fatal error:', error); + process.exit(1); + } +} + +// Run the main function +main(); \ No newline at end of file diff --git a/src/posters/MangaPoster.js b/src/posters/MangaPoster.js new file mode 100644 index 0000000..eeccc00 --- /dev/null +++ b/src/posters/MangaPoster.js @@ -0,0 +1,72 @@ +const config = require('../config/config'); + +class MangaPoster { + + async getMangaId(mangaName) { + const response = await fetch(config.getApiUrl('getMangaId') + '/' + mangaName, { + method: 'GET', + }); + return response.json(); + } + + async getGenreId(genreName) { + const response = await fetch(config.getApiUrl('getGenreId') + '/' + genreName, { + method: 'GET', + }); + return response.json(); + } + + async insertMangaInfo(mangaInfo) { + const response = await fetch(config.getApiUrl('insertMangaInfo'), { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(mangaInfo) + }); + return response.json(); + } + + async insertMangaGenres(mangaId, genres) { + for (const genre of genres) { + let genreId; + let response = await this.getGenreId(genre); + if (response.success == false) { + response = await this.insertGenre({genreName: genre}); + } + genreId = response.genreId; + + response = await fetch(config.getApiUrl('insertMangaGenres'), { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({mangaId, genreId}) + }); + } + } + + async insertGenre(genre) { + const response = await fetch(config.getApiUrl('insertGenre'), { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(genre) + }); + return response.json(); + } + + async insertMangaChapter(mangaChapter) { + const response = await fetch(config.getApiUrl('insertMangaChapter'), { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(mangaChapter) + }); + return response.json(); + } +} + +module.exports = MangaPoster; \ No newline at end of file diff --git a/src/scrapers/BaseScraper.js b/src/scrapers/BaseScraper.js new file mode 100644 index 0000000..2b4138d --- /dev/null +++ b/src/scrapers/BaseScraper.js @@ -0,0 +1,97 @@ +const puppeteer = require('puppeteer'); +const config = require('../config/config'); + +/** + * Base class for manga scrapers + * Provides common functionality for browser management and page handling + */ +class BaseScraper { + constructor() { + this.browser = null; + this.page = null; + this.page_response = {}; + } + + /** + * Initialize the browser + * @returns {Promise} + */ + async init() { + this.browser = await puppeteer.launch({ + headless: config.getScraperSetting('headless'), + args: ['--no-sandbox', '--disable-setuid-sandbox'] + }); + this.page = (await this.browser.pages())[0]; + + // Set up response interception + this.page.on('response', async (response) => { + if (this.shouldInterceptResponse(response)) { + if(this.isCoverPic(response)) { + this.page_response['cover'] = await response.buffer(); + } + else{ + this.page_response[response.url()] = await response.buffer(); + } + } + }); + console.log("Browser initialized"); + } + + /** + * Check if a response should be intercepted + * @param {Response} response - Puppeteer response object + * @returns {boolean} Whether to intercept the response + */ + shouldInterceptResponse(response) { + return false; // Override in child classes + } + + /** + * Navigate to a URL + * @param {string} url - URL to navigate to + * @returns {Promise} + */ + async navigateTo(url) { + await this.page.goto(url, { waitUntil: 'domcontentloaded' }); + } + + /** + * Close the browser + * @returns {Promise} + */ + async close() { + if (this.page) { + await this.page.close(); + this.page = null; + } + if (this.browser) { + await this.browser.close(); + this.browser = null; + } + this.page_response = {}; + } + + /** + * Scroll the page smoothly to the bottom + * @returns {Promise} + */ + async scrollPage() { + await this.page.evaluate(async () => { + window.scrollTo(0, 0); + + await new Promise((resolve) => { + const distance = 100; + const delay = 100; + const scrollInterval = setInterval(() => { + window.scrollBy(0, distance); + if ((window.innerHeight + window.scrollY) >= document.body.offsetHeight) { + clearInterval(scrollInterval); + resolve(); + } + }, delay); + }); + }); + } +} + +module.exports = BaseScraper; \ No newline at end of file diff --git a/src/scrapers/ColaMangaScraper.js b/src/scrapers/ColaMangaScraper.js new file mode 100644 index 0000000..6026d32 --- /dev/null +++ b/src/scrapers/ColaMangaScraper.js @@ -0,0 +1,258 @@ +const BaseScraper = require('./BaseScraper'); +const sharp = require('sharp'); +const fs = require('fs').promises; +const path = require('path'); +const config = require('../config/config'); + +/** + * Scraper implementation for colamanga.com + */ +class ColaMangaScraper extends BaseScraper { + constructor() { + super(); + this.page_response = new Map(); // Use Map instead of object for better memory management + this.maxConcurrentDownloads = config.getDownloadSetting('maxConcurrentDownloads'); + this.downloadQueue = []; + } + + /** + * Check if a response should be intercepted + * @param {Response} response - Puppeteer response object + * @returns {boolean} Whether to intercept the response + */ + shouldInterceptResponse(response) { + return response.url().includes('https://res.colamanga.com') || + /blob:https:\/\/www\.colamanga\.com\//.test(response.url()); + } + + isCoverPic(response) { + if(response.url().includes('https://res.colamanga.com')) { + return true; + } + return false; + } + + /** + * Extract basic manga information from the page + * @returns {Promise} Basic manga info + */ + async extractBasicInfo() { + const mangaName = await this.page.$eval('.fed-deta-content h1', el => el.textContent.trim()); + return { + mangaName: mangaName, + mangaAuthor: '', + mangaStatus: '', + mangaNickname: [], + genres: [], + chapters: [] + }; + } + + /** + * Extract detailed manga information + * @param {Object} mangaInfo - Basic manga info to extend + * @returns {Promise} + */ + async extractDetailedInfo(mangaInfo) { + const elements = await this.page.$$('.fed-deta-content li'); + const infoMap = { + '状态': 'mangaStatus', + '作者': 'mangaAuthor', + '别名': 'mangaNickname', + '类别': 'genres' + }; + + for (const el of elements) { + const span = await this.page.evaluate(e => e.querySelector('span')?.textContent.trim(), el); + const field = infoMap[span]; + if (!field) continue; + + if (field === 'mangaNickname' || field === 'genres') { + mangaInfo[field] = await this.page.evaluate(e => { + return Array.from(e.querySelectorAll('a')).map(a => a.textContent.trim()); + }, el); + } else { + mangaInfo[field] = await this.page.evaluate(e => e.querySelector('a')?.textContent.trim(), el); + } + } + } + + /** + * Extract chapter information from the page + * @param {string} mangaUrl - Base manga URL + * @returns {Promise} Array of chapter objects + */ + async extractChapterInfo(mangaUrl) { + const chapterElements = await this.page.$$('.all_data_list li'); + return Promise.all(chapterElements.reverse().map(async (el, index) => { + const chapterName = await el.$eval('a', el => el.textContent.trim()); + const chapterUrl = await el.$eval('a', el => el.getAttribute('href')); + return { + chapterName: chapterName, + url: new URL(chapterUrl, mangaUrl).href, + order: index + 1 + }; + })); + } + + /** + * Wait for cover picture to be loaded + * @returns {Promise} Cover image buffer + */ + async waitForCoverPicture() { + const maxWaitTime = config.getScraperSetting('timeout'); + const startTime = Date.now(); + + while (Date.now() - startTime < maxWaitTime) { + if (this.page_response['cover']) { + return this.page_response['cover']; + } + await new Promise(resolve => setTimeout(resolve, config.getDownloadSetting('downloadDelay'))); + } + throw new Error('Timeout waiting for cover picture'); + } + + /** + * Get complete manga information + * @param {string} mangaUrl - Manga URL + * @returns {Promise} Complete manga information + */ + async getMangaInfo(mangaUrl) { + try { + await this.navigateTo(mangaUrl); + await this.page.waitForSelector('.fed-deta-info', { visible: true }); + + const mangaInfo = await this.extractBasicInfo(); + await this.extractDetailedInfo(mangaInfo); + mangaInfo.chapters = await this.extractChapterInfo(mangaUrl); + mangaInfo.coverPic = await this.waitForCoverPicture(); + return mangaInfo; + } catch (error) { + throw new Error(`Failed to get manga info: ${error.message}`); + } + } + + /** + * Download chapter images + * @param {Object} chapter - Chapter information + * @param {string} chapterDir - Chapter directory + * @returns {Promise} + */ + async downloadChapterPics(chapter, chapterDir = ".") { + try { + await this.navigateTo(chapter.url); + await this.page.waitForSelector('.mh_mangalist', { visible: true }); + await this.scrollPage(); + await this.saveImages(chapterDir); + this.page_response.clear(); + } catch (error) { + throw new Error(`Failed to download chapter ${chapter.name}: ${error.message}`); + } + } + + /** + * Wait for all chapter images to load + * @returns {Promise} Total number of images + */ + async saveImages(chapterDir) { + const loadingElements = await this.page.$$('.mh_comicpic'); + const reversedElements = loadingElements.reverse(); + + let totalImages = 0; + for (const element of reversedElements) { + const pValue = await this.page.evaluate(el => el.getAttribute('p'), element); + + await this.page.evaluate(el => { + const rect = el.getBoundingClientRect(); + window.scrollTo({ + top: window.scrollY + rect.top - (window.innerHeight / 2), + behavior: 'smooth' + }); + }, element); + await new Promise(resolve => setTimeout(resolve, 1000)); + + const loadingDiv = await element.$('.mh_loading'); + let isVisible = true; + if (loadingDiv) { + isVisible = await this.page.evaluate(el => { + return el.offsetParent !== null; + }, loadingDiv); + } + + if (isVisible) { + if (totalImages == 0) { + totalImages = pValue; + } + const imgDiv = await element.$('img'); + const imgSrc = await this.page.evaluate(el => el.src, imgDiv); + while (!this.page_response[imgSrc]) { + await new Promise(resolve => setTimeout(resolve, 100)); + } + await this.saveImage(imgSrc, this.page_response[imgSrc], chapterDir); + } + } + return totalImages; + } + + /** + * Get image order from page + * @param {string} src - Image source URL + * @returns {Promise} Image order + */ + async getImgOrder(src) { + const loadingAttributes = await this.page.$$eval('.mh_comicpic', (elements, src) => { + return elements + .filter(el => el.querySelector(`img[src="${src}"]`)) + .map(el => el.getAttribute('p')) + .map(str => str.padStart(elements.length.toString().length, '0')); + }, src); + return loadingAttributes[0]; + } + + /** + * Process and save a single image + * @param {string} url - Image URL + * @param {Buffer} buffer - Image buffer + * @param {string} chapterDir - Chapter directory + * @returns {Promise} + */ + async saveImage(url, buffer, chapterDir) { + try { + const order = await this.getImgOrder(url); + const filename = `${order}.webp`; + const filepath = path.join(chapterDir, filename); + + // Process image with sharp using streams for better memory management + await sharp(buffer) + .webp({ + quality: config.getDownloadSetting('imageQuality') + }) + .toFormat('webp') + .toFile(filepath); + + // Clear the buffer from memory + buffer = null; + return true; + } catch (error) { + console.error(`Failed to process image ${url}: ${error.message}`); + throw error; + } + } + + /** + * Clean up resources + */ + async cleanup() { + this.page_response.clear(); + if (this.page) { + await this.page.evaluate(() => { + // Clear browser memory + if (window.gc) { + window.gc(); + } + }); + } + } +} + +module.exports = ColaMangaScraper; \ No newline at end of file diff --git a/src/utils/helpers.js b/src/utils/helpers.js new file mode 100644 index 0000000..39753f5 --- /dev/null +++ b/src/utils/helpers.js @@ -0,0 +1,157 @@ +const fs = require('fs'); +const path = require('path'); +const { v4: uuidv4 } = require('uuid'); + +/** + * Utility functions for the manga scraper + */ +class Helpers { + /** + * Sanitize a string to be used as a filename + * @param {string} str - String to sanitize + * @returns {string} Sanitized string + */ + static sanitizeFileName(str) { + const invalidChars = /[<>:"/\\|?*\x00-\x1F]/g; + return str.replace(invalidChars, '_') + .replace(/\s+/g, '_') + .trim(); + } + + /** + * Generate a smart delay based on time of day + * @returns {number} Delay in milliseconds + */ + static getSmartDelay() { + const hour = new Date().getHours(); + let minDelay, maxDelay; + + // Midnight to 6 AM: 20-30 minutes + if (hour >= 0 && hour < 6) { + minDelay = 20 * 60 * 1000; + maxDelay = 30 * 60 * 1000; + } + // 6 PM to midnight: 5-8 minutes + else if (hour >= 18 && hour < 24) { + minDelay = 5 * 60 * 1000; + maxDelay = 8 * 60 * 1000; + } + // Day time: 10-12 minutes + else { + minDelay = 10 * 60 * 1000; + maxDelay = 12 * 60 * 1000; + } + + return minDelay + Math.random() * (maxDelay - minDelay); + } + + /** + * Format delay time for logging + * @param {number} delay - Delay in milliseconds + * @returns {string} Formatted delay string + */ + static formatDelay(delay) { + const minutes = Math.floor(delay / 60000); + const seconds = Math.floor((delay % 60000) / 1000); + return `${minutes} minutes and ${seconds} seconds`; + } + + /** + * Generate a unique ID + * @returns {string} UUID v4 + */ + static generateId() { + return uuidv4(); + } + + /** + * Ensure directory exists + * @param {string} dirPath - Directory path + * @returns {Promise} + */ + static async ensureDirectory(dirPath) { + try { + await fs.promises.mkdir(dirPath, { recursive: true }); + } catch (error) { + throw new Error(`Failed to create directory ${dirPath}: ${error.message}`); + } + } + + /** + * Read JSON file + * @param {string} filePath - Path to JSON file + * @returns {Promise} Parsed JSON data + */ + static async readJsonFile(filePath) { + try { + const data = await fs.promises.readFile(filePath, 'utf8'); + return JSON.parse(data); + } catch (error) { + throw new Error(`Failed to read JSON file ${filePath}: ${error.message}`); + } + } + + /** + * Write JSON file + * @param {string} filePath - Path to JSON file + * @param {Object} data - Data to write + * @returns {Promise} + */ + static async writeJsonFile(filePath, data) { + try { + await fs.promises.writeFile(filePath, JSON.stringify(data, null, 2)); + } catch (error) { + throw new Error(`Failed to write JSON file ${filePath}: ${error.message}`); + } + } + + /** + * Check if file exists + * @param {string} filePath - Path to file + * @returns {Promise} + */ + static async fileExists(filePath) { + try { + await fs.promises.access(filePath); + return true; + } catch { + return false; + } + } + + /** + * Get file extension from URL + * @param {string} url - URL to get extension from + * @returns {string} File extension + */ + static getFileExtension(url) { + try { + const parsedUrl = new URL(url); + const pathname = parsedUrl.pathname; + const extension = path.extname(pathname).toLowerCase(); + return extension || '.webp'; // Default to .webp if no extension found + } catch { + return '.webp'; + } + } + + /** + * Check if directory exists and is not empty + * @param {string} dirPath - Directory path to check + * @returns {Promise} True if directory exists and is not empty, false otherwise + */ + static async isDirectoryNotEmpty(dirPath) { + try { + const stats = await fs.promises.stat(dirPath); + if (!stats.isDirectory()) { + return false; + } + const files = await fs.promises.readdir(dirPath); + return files.length > 0; + } catch (error) { + return false; + } + } +} + +module.exports = Helpers; \ No newline at end of file