""" Manga toolkit — download from m.happymh.com, upload to Cloudflare R2. Usage: python manga.py """ import io import json import os import re import time import socket import subprocess from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path from urllib.parse import urlparse import boto3 import psycopg2 from PIL import Image from dotenv import load_dotenv from playwright.sync_api import sync_playwright from simple_term_menu import TerminalMenu load_dotenv() # ── Config ───────────────────────────────────────────────── BASE_URL = "https://m.happymh.com" ROOT_DIR = Path(__file__).parent CONTENT_DIR = ROOT_DIR / "manga-content" MANGA_JSON = ROOT_DIR / "manga.json" BROWSER_DATA = ROOT_DIR / ".browser-data" CDP_PORT = 9333 REQUEST_DELAY = 1.5 UPLOAD_WORKERS = 8 CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" # R2 s3 = boto3.client( "s3", endpoint_url=f"https://{os.environ['R2_ACCOUNT_ID']}.r2.cloudflarestorage.com", aws_access_key_id=os.environ["R2_ACCESS_KEY"], aws_secret_access_key=os.environ["R2_SECRET_KEY"], region_name="auto", ) BUCKET = os.environ["R2_BUCKET"] PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/") DATABASE_URL = os.environ["DATABASE_URL"] # ── Chrome management ────────────────────────────────────── def hide_chrome(): """Hide Chrome window on macOS.""" try: subprocess.Popen( ["osascript", "-e", 'tell application "System Events" to set visible of process "Google Chrome" to false'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) except Exception: pass def is_port_open(port): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: return s.connect_ex(("localhost", port)) == 0 def launch_chrome(start_url=None): if is_port_open(CDP_PORT): return None if not Path(CHROME_PATH).exists(): print(f" Chrome not found at: {CHROME_PATH}") return None cmd = [ CHROME_PATH, f"--remote-debugging-port={CDP_PORT}", f"--user-data-dir={BROWSER_DATA}", "--no-first-run", "--no-default-browser-check", "--window-position=0,0", "--window-size=800,600", "--no-focus-on-navigate", ] if start_url: cmd.append(start_url) proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) for _ in range(30): if is_port_open(CDP_PORT): time.sleep(1) hide_chrome() return proc time.sleep(0.5) print(" Chrome failed to start") return None class BrowserSession: """Manages Chrome + CDP lifecycle.""" def __init__(self): self.chrome_proc = None self.playwright = None self.browser = None self.page = None def start(self): self.chrome_proc = launch_chrome() self.playwright = sync_playwright().start() self.browser = self.playwright.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}") context = self.browser.contexts[0] self.page = context.pages[0] if context.pages else context.new_page() def close(self): try: self.browser.close() except Exception: pass if self.chrome_proc: self.chrome_proc.terminate() if self.playwright: self.playwright.stop() def with_browser(func): """Run func(session) inside a Chrome+CDP session. Returns func's result.""" session = BrowserSession() session.start() try: return func(session) finally: session.close() # ── Cloudflare ───────────────────────────────────────────── def wait_for_cloudflare(session, timeout=120): """Wait for CF to resolve. User solves in the visible browser window.""" page = session.page for i in range(timeout): try: title = page.title() except Exception: time.sleep(1) continue if "Just a moment" in title or "challenge" in page.url: if i == 0: print(" CF challenge — solve in browser...") elif i % 15 == 0: print(f" Still waiting for CF... ({i}s)") time.sleep(1) continue if title and ("嗨皮漫画" in title or "happymh" in page.url): return True time.sleep(1) print(" CF timed out.") return False # ── Happymh: chapter fetching ───────────────────────────── def fetch_chapters_via_api(page, slug): result = page.evaluate(""" async (slug) => { const all = []; let total = 0; for (let p = 1; p <= 30; p++) { const url = `/v2.0/apis/manga/chapterByPage?code=${slug}&lang=cn&order=asc&page=${p}&_t=${Date.now()}`; try { const ctrl = new AbortController(); setTimeout(() => ctrl.abort(), 10000); const r = await fetch(url, { signal: ctrl.signal }); if (!r.ok) { if (p === 1) return { error: r.status }; break; } const json = await r.json(); if (!json.data) break; total = json.data.total || total; let items = null; for (const val of Object.values(json.data)) { if (Array.isArray(val) && val.length > 0) { items = val; break; } } if (!items || items.length === 0) break; for (const ch of items) { all.push({ id: String(ch.id || ''), chapterName: ch.chapterName || ch.name || '' }); } if (total && all.length >= total) break; } catch (e) { if (p === 1) return { error: e.message }; break; } } return { chapters: all, total }; } """, slug) if result and result.get("chapters") and len(result["chapters"]) > 0: chapters = result["chapters"] total = result.get("total", len(chapters)) print(f" API: {len(chapters)}/{total} chapters") return chapters if result and result.get("error"): print(f" API error: {result['error']}") return None def fetch_chapters_from_dom(page): try: page.wait_for_selector("a[href*='/mangaread/']", timeout=15000) page.wait_for_timeout(1000) except Exception: return None for selector in ["text=展开全部", "text=查看全部", "text=全部章节", "text=展开更多", "text=更多"]: try: btn = page.query_selector(selector) if btn and btn.is_visible(): btn.click() page.wait_for_timeout(2000) break except Exception: continue try: page.wait_for_selector(".MuiDrawer-paper", timeout=5000) except Exception: pass try: sort_btn = page.query_selector("text=点我改变排序") if sort_btn and sort_btn.is_visible(): sort_btn.click() page.wait_for_timeout(2000) except Exception: pass total = page.evaluate(""" () => { const spans = document.querySelectorAll('.MuiDrawer-paper span'); for (const s of spans) { const m = s.textContent.match(/共(\\d+)个章节/); if (m) return parseInt(m[1]); } return 0; } """) for _ in range(50): count = page.evaluate("document.querySelectorAll('.MuiDrawer-paper a[href*=\"/mangaread/\"]').length") if total and count >= total: break clicked = page.evaluate(""" () => { const walker = document.createTreeWalker( document.querySelector('.MuiDrawer-paper') || document.body, NodeFilter.SHOW_TEXT ); while (walker.nextNode()) { if (walker.currentNode.textContent.includes('加载更多')) { let el = walker.currentNode.parentElement; while (el && el.tagName !== 'LI') el = el.parentElement; if (el) { el.click(); return true; } walker.currentNode.parentElement.click(); return true; } } return false; } """) if not clicked: break page.wait_for_timeout(1000) chapters = page.evaluate(""" () => { const container = document.querySelector('.MuiDrawer-paper') || document; const links = container.querySelectorAll('a[href*="/mangaread/"]'); const chapters = [], seen = new Set(); links.forEach(a => { const match = a.getAttribute('href').match(/\\/mangaread\\/[^/]+\\/(\\d+)/); if (match && !seen.has(match[1])) { seen.add(match[1]); const name = a.textContent.trim(); if (name && name !== '开始阅读') chapters.push({ id: match[1], chapterName: name }); } }); return chapters; } """) try: page.keyboard.press("Escape") except Exception: pass return chapters if chapters else None # ── Happymh: metadata & cover ───────────────────────────── def fetch_metadata(page): html_text = page.content() metadata = {"mg-url": page.url} m = re.search(r'

(.*?)

', html_text) if m: metadata["mg-title"] = m.group(1).strip() m = re.search(r'

.*?]*>(.*?)', html_text, re.DOTALL) if m: metadata["mg-author"] = m.group(1).strip() genre_matches = re.findall(r'

.*?

', html_text, re.DOTALL) if genre_matches: metadata["mg-genres"] = re.findall(r']*>(.*?)', genre_matches[0]) m = re.search(r'
.*?]*>(.*?)

', html_text, re.DOTALL) if m: metadata["mg-description"] = m.group(1).strip() if not metadata.get("mg-description"): m = re.search(r']*>(.*?)', html_text, re.DOTALL) if m: desc = re.sub(r'<[^>]+>', '', m.group(1)).strip() if desc: metadata["mg-description"] = desc cover_url = page.evaluate(""" () => { const og = document.querySelector('meta[property="og:image"]'); if (og) return og.content; for (const sel of ['img.mg-cover', 'img[src*="mcover"]']) { const img = document.querySelector(sel); if (img && img.src) return img.src; } return null; } """) if cover_url: metadata["mg-cover"] = cover_url return metadata # ── Happymh: image download ─────────────────────────────── def _try_get_chapter_images(session, slug, chapter_id): """Single attempt to get chapter images. Returns (images, api_status).""" captured_images = [] api_info = {"found": False, "status": None, "error": None} def on_response(response): if "/apis/manga/reading" not in response.url: return # Only capture our chapter, skip prefetched ones if f"cid={chapter_id}" not in response.url and f"cid%3D{chapter_id}" not in response.url: return # Ignore if we already captured images (prevent duplicate/prefetch) if captured_images: return api_info["found"] = True api_info["status"] = response.status if response.status != 200: api_info["error"] = f"status {response.status}" return try: data = response.json() # Verify chapter ID in response body resp_cid = str(data.get("data", {}).get("id", "")) if resp_cid and resp_cid != str(chapter_id): return scans = data.get("data", {}).get("scans", []) if isinstance(scans, str): scans = json.loads(scans) for scan in scans: if isinstance(scan, dict) and "url" in scan: captured_images.append({ "url": scan["url"], "no_referrer": scan.get("r", 0) != 0, }) except Exception as e: api_info["error"] = str(e) page = session.page page.on("response", on_response) reader_url = f"{BASE_URL}/mangaread/{slug}/{chapter_id}" print(" Loading reader...") try: page.evaluate(f"window.location.href = '{reader_url}'") except Exception: pass hide_chrome() time.sleep(2) try: page.evaluate("window.close = () => {}") except Exception: pass print(" Waiting for page...") if not wait_for_cloudflare(session, timeout=90): page = session.page try: page.remove_listener("response", on_response) except Exception: pass return [], api_info page = session.page print(" Waiting for API...") deadline = time.time() + 20 while time.time() < deadline: if captured_images: break try: page.wait_for_timeout(500) except Exception: break try: page.remove_listener("response", on_response) except Exception: pass if not api_info["found"]: print(" API not intercepted") elif api_info["error"]: print(f" API: {api_info['error']}") # Filter out next-chapter preview images by counting DOM containers if captured_images: try: counts = page.evaluate(""" () => { const all = document.querySelectorAll('[class*="imgContainer"]').length; const next = document.querySelectorAll('[class*="imgNext"]').length; return { all, next, current: all - next }; } """) if counts and counts.get("next", 0) > 0: actual = counts["current"] if 0 < actual < len(captured_images): captured_images = captured_images[:actual] except Exception: pass # DOM fallback if not captured_images: try: page.wait_for_timeout(3000) dom_images = page.evaluate(""" () => { const imgs = document.querySelectorAll('img[src*="http"]'); const nextImgs = new Set( Array.from(document.querySelectorAll('[class*="imgNext"] img')) .map(img => img.src) ); const urls = [], seen = new Set(); imgs.forEach(img => { const src = img.src || ''; if (src && !seen.has(src) && !nextImgs.has(src) && !src.includes('/mcover/') && !src.includes('cloudflare') && !src.includes('.svg')) { seen.add(src); urls.push(src); } }); return urls; } """) if dom_images: print(f" DOM: {len(dom_images)} images") for u in dom_images: captured_images.append({"url": u, "no_referrer": False}) except Exception as e: print(f" DOM failed: {e}") return captured_images, api_info def get_chapter_images(session, slug, chapter_id): """Get chapter images. On API 403 (CF expired), navigate to solve and retry.""" images, api_info = _try_get_chapter_images(session, slug, chapter_id) if images: return images if api_info.get("status") == 403: print(" CF expired — solve in browser...") page = session.page try: page.goto(f"{BASE_URL}/mangaread/{slug}/{chapter_id}", wait_until="commit", timeout=60000) except Exception: pass if wait_for_cloudflare(session, timeout=120): images, _ = _try_get_chapter_images(session, slug, chapter_id) return images def fetch_image_bytes(session, img): """Fetch image via browser network stack, return raw bytes or None.""" page = session.page url = img["url"] ref_policy = "no-referrer" if img.get("no_referrer") else "origin" try: with page.expect_response(lambda r: url in r.url, timeout=15000) as resp_info: page.evaluate("([u, r]) => fetch(u, { referrerPolicy: r })", [url, ref_policy]) response = resp_info.value if response.status == 200: body = response.body() if body and len(body) > 100: return body except Exception as e: if not hasattr(fetch_image_bytes, "_err_logged"): fetch_image_bytes._err_logged = True print(f"\n First error: {e}") return None def download_image(session, img, save_path): """Fetch image and save to disk.""" if save_path.exists(): return True body = fetch_image_bytes(session, img) if body: save_path.parent.mkdir(parents=True, exist_ok=True) save_path.write_bytes(body) return True return False # ── R2 / Upload ──────────────────────────────────────────── WEBP_QUALITY = 75 def _to_webp_bytes(img, quality=WEBP_QUALITY, method=6): buf = io.BytesIO() img.save(buf, format="WEBP", quality=quality, method=method) return buf.getvalue() def convert_to_webp(source, quality=WEBP_QUALITY): return _to_webp_bytes(Image.open(source), quality) def make_cover(source, width=400, height=560): img = Image.open(source) target_ratio = width / height img_ratio = img.width / img.height if img_ratio > target_ratio: new_width = int(img.height * target_ratio) left = (img.width - new_width) // 2 img = img.crop((left, 0, left + new_width, img.height)) else: new_height = int(img.width / target_ratio) img = img.crop((0, 0, img.width, new_height)) img = img.resize((width, height), Image.LANCZOS) return _to_webp_bytes(img, quality=80) def upload_to_r2(key, data, content_type="image/webp"): s3.put_object(Bucket=BUCKET, Key=key, Body=data, ContentType=content_type) return f"{PUBLIC_URL}/{key}" def r2_key_exists(key): try: s3.head_object(Bucket=BUCKET, Key=key) return True except s3.exceptions.ClientError: return False def get_db(): conn = psycopg2.connect(DATABASE_URL) conn.set_client_encoding("UTF8") return conn def parse_chapter_dir(dir_name): m = re.match(r"^(\d+)\s+(.+)$", dir_name) if m: return int(m.group(1)), m.group(2) return 0, dir_name # ── Helpers ──────────────────────────────────────────────── def load_manga_urls(): if not MANGA_JSON.exists(): return [] data = json.loads(MANGA_JSON.read_text(encoding="utf-8")) return data if isinstance(data, list) else [] def slug_from_url(url): return urlparse(url).path.strip("/").split("/")[-1] def get_existing_chapters(manga_dir): existing = set() if manga_dir.exists(): for entry in manga_dir.iterdir(): if entry.is_dir() and any(entry.glob("*.jpg")): existing.add(entry.name) return existing def list_local_manga(): if not CONTENT_DIR.exists(): return [] return sorted(d.name for d in CONTENT_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")) # ── Core: download manga ────────────────────────────────── def load_manga_page(session, slug): """Navigate to manga page, pass CF, return (chapters, metadata, cover_bytes) or None.""" cover_responses = {} def on_cover(response): if "/mcover/" in response.url and response.status == 200: try: cover_responses[response.url] = response.body() except Exception: pass page = session.page page.on("response", on_cover) print(" Loading manga page...") try: page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000) except Exception: pass hide_chrome() if not wait_for_cloudflare(session): page = session.page try: page.remove_listener("response", on_cover) except Exception: pass return None page = session.page # may have changed after CF restart print(" Fetching chapters...") chapters = fetch_chapters_via_api(page, slug) if not chapters: print(" API failed, trying DOM...") chapters = fetch_chapters_from_dom(page) metadata = fetch_metadata(page) # Wait for cover image to be present in DOM (up to 8s) cover_url = None for _ in range(16): cover_url = page.evaluate(""" () => { const sels = ['img.mip-fill-content[src*="mcover"]', 'img[src*="/mcover/"]']; for (const s of sels) { const img = document.querySelector(s); if (img && img.src) return img.src; } return null; } """) if cover_url: break page.wait_for_timeout(500) # Give the response another moment to be captured if cover_url and cover_url not in cover_responses: page.wait_for_timeout(1500) try: page.remove_listener("response", on_cover) except Exception: pass cover_body = None if cover_url: cover_body = cover_responses.get(cover_url) if not cover_body: for url, data in cover_responses.items(): if cover_url.split("?")[0] in url or url.split("?")[0] in cover_url: cover_body = data break if not cover_body: if cover_url: print(f" Cover URL found but body not captured ({len(cover_responses)} responses)") else: print(f" No cover URL found in DOM") return chapters, metadata, cover_body def save_manga_local(slug, metadata, cover_body): """Save metadata and cover to local manga-content/.""" manga_dir = CONTENT_DIR / slug manga_dir.mkdir(parents=True, exist_ok=True) detail_path = manga_dir / "detail.json" if metadata: existing = {} if detail_path.exists(): try: existing = json.loads(detail_path.read_text(encoding="utf-8")) except json.JSONDecodeError: pass existing.update(metadata) detail_path.write_text(json.dumps(existing, ensure_ascii=False, indent=4), encoding="utf-8") cover_path = manga_dir / "cover.jpg" if not cover_path.exists() and cover_body and len(cover_body) > 100: cover_path.write_bytes(cover_body) print(f" Cover saved ({len(cover_body)} bytes)") def download_chapter(session, slug, chapter_index, chapter, manga_dir): """Download a single chapter's images. Returns True if successful.""" ch_id = chapter["id"] ch_name = chapter["chapterName"] folder_name = f"{chapter_index} {ch_name}" chapter_dir = manga_dir / folder_name images = get_chapter_images(session, slug, ch_id) if not images: print(f" No images") return False print(f" {len(images)} pages") chapter_dir.mkdir(parents=True, exist_ok=True) ok = 0 failed = [] for pn, img in enumerate(images, 1): save_path = chapter_dir / f"{pn}.jpg" if download_image(session, img, save_path): ok += 1 print(f" {pn}/{len(images)}", end="\r") else: failed.append((pn, img)) time.sleep(0.1) if failed: time.sleep(1) for pn, img in failed: save_path = chapter_dir / f"{pn}.jpg" if download_image(session, img, save_path): ok += 1 else: print(f" {pn}/{len(images)} FAIL") time.sleep(0.3) print(f" {ok}/{len(images)} downloaded" + " " * 20) if ok == 0: try: chapter_dir.rmdir() except Exception: pass return False time.sleep(REQUEST_DELAY) return True # ── Core: upload manga ──────────────────────────────────── def upload_manga_to_r2(manga_name, conn): """Upload a local manga to R2 and create DB records.""" manga_path = CONTENT_DIR / manga_name detail_path = manga_path / "detail.json" if not detail_path.exists(): print(f" Skipping {manga_name}: no detail.json") return detail = json.loads(detail_path.read_text(encoding="utf-8")) title = detail.get("mg-title", manga_name) slug = manga_name genres = detail.get("mg-genres", []) description = detail.get("mg-description", "") if not description and genres: description = f"Genres: {', '.join(genres)}" genre = genres[0] if genres else "Drama" cur = conn.cursor() # Cover cover_file = manga_path / "cover.jpg" cover_url = "" cover_key = f"manga/{slug}/cover.webp" if cover_file.exists(): if not r2_key_exists(cover_key): cover_url = upload_to_r2(cover_key, make_cover(cover_file)) print(f" Cover uploaded") else: cover_url = f"{PUBLIC_URL}/{cover_key}" # Manga record cur.execute('SELECT id, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,)) row = cur.fetchone() if row: manga_id, existing_cover = row if cover_url and cover_url != existing_cover: cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id)) conn.commit() else: cur.execute( 'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") ' "VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id", (title, description, cover_url, slug, genre), ) manga_id = cur.fetchone()[0] conn.commit() print(f" Created manga (id: {manga_id})") # Chapters chapter_dirs = sorted( [d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")], key=lambda d: parse_chapter_dir(d.name)[0], ) for chapter_dir in chapter_dirs: order_num, chapter_title = parse_chapter_dir(chapter_dir.name) if order_num == 0: continue cur.execute('SELECT id FROM "Chapter" WHERE "mangaId" = %s AND number = %s', (manga_id, order_num)) if cur.fetchone(): print(f" [{order_num}] {chapter_title} — skip") continue page_files = sorted( [f for f in chapter_dir.iterdir() if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")], key=lambda f: int(re.search(r"(\d+)", f.stem).group(1)) if re.search(r"(\d+)", f.stem) else 0, ) if not page_files: continue print(f" [{order_num}] {chapter_title} ({len(page_files)} pages)") # Upload to R2 first def process_page(args, _slug=slug, _order=order_num): j, pf = args r2_key = f"manga/{_slug}/chapters/{_order}/{j}.webp" if not r2_key_exists(r2_key): return j, upload_to_r2(r2_key, convert_to_webp(pf)) return j, f"{PUBLIC_URL}/{r2_key}" page_urls = {} done = 0 with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool: futures = {pool.submit(process_page, (j, f)): j for j, f in enumerate(page_files, 1)} for future in as_completed(futures): j, url = future.result() page_urls[j] = url done += 1 print(f" {done}/{len(page_files)}", end="\r") if not page_urls: print(f" Upload failed, skip") continue # DB records only after R2 upload succeeds cur.execute( 'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id', (manga_id, order_num, chapter_title), ) chapter_id = cur.fetchone()[0] for j in sorted(page_urls): cur.execute('INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)', (chapter_id, j, page_urls[j])) conn.commit() print(f" {len(page_files)} pages uploaded" + " " * 10) # ── Commands ─────────────────────────────────────────────── def cmd_setup(): print("\n Chrome will open. Solve Cloudflare on:") print(" 1. m.happymh.com") print(" 2. Any manga page") print(" 3. Any reader page\n") chrome_proc = launch_chrome(BASE_URL) if not chrome_proc and not is_port_open(CDP_PORT): print(" Failed to launch Chrome") return input(" Press ENTER when done... ") try: with sync_playwright() as p: browser = p.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}") cookies = browser.contexts[0].cookies() cf = [c for c in cookies if c["name"] == "cf_clearance"] print(f" cf_clearance: {'found' if cf else 'NOT found'}") browser.close() except Exception as e: print(f" Could not verify: {e}") if chrome_proc: chrome_proc.terminate() print() def cmd_download(manga_url=None, chapter_set=None): """Download manga. chapter_set is a set of 1-based indices, or None for all.""" urls = [manga_url] if manga_url else load_manga_urls() if not urls: print(" No URLs in manga.json") return print(f"\n Downloading {len(urls)} manga(s)...\n") def run(session): for url in urls: slug = slug_from_url(url) try: result = load_manga_page(session, slug) if not result: continue chapters, metadata, cover_body = result if not chapters: print(" No chapters found.") continue print(f" Found {len(chapters)} chapters") save_manga_local(slug, metadata, cover_body) existing = get_existing_chapters(CONTENT_DIR / slug) for i, ch in enumerate(chapters, 1): if chapter_set and i not in chapter_set: continue if any(ch["chapterName"] in name for name in existing): print(f" [{i}/{len(chapters)}] {ch['chapterName']} — skip") continue print(f" [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})") download_chapter(session, slug, i, ch, CONTENT_DIR / slug) print(f"\n Done: {slug}") except Exception as e: print(f"\n Error: {url}: {e}") import traceback traceback.print_exc() with_browser(run) print("\nDownload complete!") def cmd_upload(manga_name=None): if manga_name: names = [manga_name] else: names = list_local_manga() if not names: print(" No manga in manga-content/") return print(f"\n Uploading {len(names)} manga(s)...") conn = get_db() try: for name in names: print(f"\n {'='*50}") print(f" {name}") print(f" {'='*50}") upload_manga_to_r2(name, conn) finally: conn.close() print("\nUpload complete!") def cmd_sync(manga_url=None): """Sync: fetch latest chapters, stream directly to R2 (no local save).""" urls = [manga_url] if manga_url else load_manga_urls() if not urls: print(" No URLs in manga.json") return conn = get_db() def run(session): for url in urls: slug = slug_from_url(url) print(f"\n{'='*60}") print(f"Syncing: {slug}") print(f"{'='*60}") # 1. Load manga page + get chapters result = load_manga_page(session, slug) if not result: continue chapters, metadata, cover_body = result if not chapters: print(" No chapters found.") continue print(f" {len(chapters)} chapters on site") # 2. Ensure manga in DB cur = conn.cursor() title = metadata.get("mg-title", slug) genres = metadata.get("mg-genres", []) description = metadata.get("mg-description", "") genre = genres[0] if genres else "Drama" # Cover → R2 (from RAM) cover_url = "" cover_key = f"manga/{slug}/cover.webp" if cover_body and len(cover_body) > 100: if not r2_key_exists(cover_key): cover_webp = make_cover(io.BytesIO(cover_body)) cover_url = upload_to_r2(cover_key, cover_webp) print(f" Cover uploaded to R2") else: cover_url = f"{PUBLIC_URL}/{cover_key}" cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (slug,)) row = cur.fetchone() if row: manga_id = row[0] if cover_url: cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id)) conn.commit() else: cur.execute( 'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") ' "VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id", (title, description, cover_url, slug, genre), ) manga_id = cur.fetchone()[0] conn.commit() print(f" Created manga in DB (id: {manga_id})") # 3. Find chapters missing from DB cur.execute('SELECT number FROM "Chapter" WHERE "mangaId" = %s', (manga_id,)) existing_numbers = {row[0] for row in cur.fetchall()} new_count = 0 for i, ch in enumerate(chapters, 1): ch_name = ch["chapterName"] if i in existing_numbers: continue new_count += 1 print(f" [{i}/{len(chapters)}] {ch_name} (id={ch['id']})") # Get image URLs from reader page images = get_chapter_images(session, slug, ch["id"]) if not images: print(f" No images") continue print(f" {len(images)} pages") # Fetch each image into RAM, convert to WebP, upload to R2 page_bytes = {} # page_num -> raw bytes ok = 0 for pn, img in enumerate(images, 1): body = fetch_image_bytes(session, img) if body: page_bytes[pn] = body ok += 1 print(f" Fetched {pn}/{len(images)}", end="\r") else: print(f" {pn}/{len(images)} FAIL") time.sleep(0.1) if not page_bytes: print(f" No images fetched, skip") continue # Upload to R2 first def upload_page(args, _slug=slug, _i=i): pn, raw = args r2_key = f"manga/{_slug}/chapters/{_i}/{pn}.webp" webp = convert_to_webp(io.BytesIO(raw)) return pn, upload_to_r2(r2_key, webp) page_urls = {} done = 0 with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool: futures = {pool.submit(upload_page, (pn, raw)): pn for pn, raw in page_bytes.items()} for future in as_completed(futures): pn, r2_url = future.result() page_urls[pn] = r2_url done += 1 print(f" R2: {done}/{len(page_bytes)}", end="\r") if not page_urls: print(f" R2 upload failed, skip") continue # Only create DB records after R2 upload succeeds cur.execute( 'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id', (manga_id, i, ch_name), ) chapter_id = cur.fetchone()[0] for pn in sorted(page_urls): cur.execute('INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)', (chapter_id, pn, page_urls[pn])) conn.commit() print(f" {len(page_urls)} pages synced" + " " * 20) time.sleep(REQUEST_DELAY) if new_count == 0: print(" Already up to date!") else: print(f" Synced {new_count} new chapters") try: with_browser(run) finally: conn.close() print("\nSync complete!") def r2_list_prefixes(): """List manga slugs in R2 by scanning top-level prefixes under manga/.""" slugs = set() paginator = s3.get_paginator("list_objects_v2") for pg in paginator.paginate(Bucket=BUCKET, Prefix="manga/", Delimiter="/"): for prefix in pg.get("CommonPrefixes", []): # "manga/slug/" -> "slug" slug = prefix["Prefix"].split("/")[1] if slug: slugs.add(slug) return sorted(slugs) def r2_count_by_prefix(prefix): """Count objects under a prefix.""" total = 0 for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix): total += len(pg.get("Contents", [])) return total def r2_delete_prefix(prefix): """Delete all objects under a prefix.""" total = 0 batches = [] for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix): objects = pg.get("Contents", []) if objects: batches.append([{"Key": obj["Key"]} for obj in objects]) def delete_batch(keys): s3.delete_objects(Bucket=BUCKET, Delete={"Objects": keys}) return len(keys) with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool: for count in pool.map(delete_batch, batches): total += count print(f" {total} deleted", end="\r") print(f" {total} objects deleted" + " " * 10) return total # ── TUI ──────────────────────────────────────────────────── def tui_select(title, options, back=True, search=False): """Arrow-key menu. Returns selected index or -1.""" items = list(options) if back: items.append("[Back]") menu = TerminalMenu( items, title=title, search_key="/" if search else None, show_search_hint=search, ) idx = menu.show() if idx is None or (back and idx == len(items) - 1): return -1 return idx _title_cache = {} def get_manga_title(slug): """Read manga title from detail.json or DB, fallback to slug.""" if slug in _title_cache: return _title_cache[slug] # Try local detail.json first detail_path = CONTENT_DIR / slug / "detail.json" if detail_path.exists(): try: detail = json.loads(detail_path.read_text(encoding="utf-8")) title = detail.get("mg-title") if title: _title_cache[slug] = title return title except Exception: pass # Try database (batch load all titles) try: conn = get_db() cur = conn.cursor() cur.execute('SELECT slug, title FROM "Manga"') for row in cur.fetchall(): _title_cache[row[0]] = row[1] conn.close() if slug in _title_cache: return _title_cache[slug] except Exception: pass return slug def manga_display_name(slug): """Format: 'title (slug)' or just 'slug'.""" title = get_manga_title(slug) if title != slug: return f"{title} ({slug})" return slug def tui_pick_manga_url(include_all=True): """Pick manga from manga.json. Shows title + slug.""" urls = load_manga_urls() if not urls: print(" No URLs in manga.json") return None slugs = [slug_from_url(u) for u in urls] items = [] if include_all: items.append("All manga") items += [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)] idx = tui_select("Select manga (/ to search):", items, search=True) if idx < 0: return None if include_all: if idx == 0: return "__all__" return urls[idx - 1] return urls[idx] def tui_pick_local(include_all=True): """Pick from local manga-content/. Shows title + slug.""" local = list_local_manga() if not local: print(" No manga in manga-content/") return None items = [] if include_all: items.append("All manga") items += [f"{i+1}. {manga_display_name(name)}" for i, name in enumerate(local)] idx = tui_select("Select manga (/ to search):", items, search=True) if idx < 0: return None if include_all: if idx == 0: return "__all__" return local[idx - 1] return local[idx] def tui_pick_r2(): """Pick manga from R2. Shows title + slug.""" slugs = r2_list_prefixes() if not slugs: print(" R2 is empty") return None items = [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)] idx = tui_select("Select manga (/ to search):", items, search=True) return slugs[idx] if idx >= 0 else None def tui_pick_chapters(chapters, slug=None): """Multi-select chapter picker. Space to toggle, Enter to confirm. Existing chapters shown grayed out. Returns set of selected 1-based indices, or None for all.""" # Check which chapters already exist locally existing = set() if slug: existing = get_existing_chapters(CONTENT_DIR / slug) # Count existing existing_count = 0 for i, ch in enumerate(chapters, 1): if any(ch["chapterName"] in name for name in existing): existing_count += 1 idx = tui_select(f"{len(chapters)} chapters ({existing_count} downloaded)", [ "All chapters (skip existing)", "Select chapters (space to toggle)", ]) if idx == -1: return "back" if idx == 0: return None # all items = [] for i, ch in enumerate(chapters, 1): done = any(ch["chapterName"] in name for name in existing) label = f"{i}. {ch['chapterName']}" if done: label = f"\033[90m{label} [done]\033[0m" items.append(label) menu = TerminalMenu( items, title="Space=toggle, Enter=confirm, /=search:", multi_select=True, show_multi_select_hint=True, search_key="/", show_search_hint=True, ) selected = menu.show() if selected is None: return "back" if isinstance(selected, int): selected = (selected,) return {i + 1 for i in selected} # 1-based def tui_download(): picked = tui_pick_manga_url() if not picked: return if picked == "__all__": cmd_download() return slug = slug_from_url(picked) print(f"\n Fetching chapters for {slug}...") def get_chapters(session): page = session.page try: page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000) except Exception: pass if not wait_for_cloudflare(session): return None return fetch_chapters_via_api(session.page, slug) chapters = with_browser(get_chapters) if not chapters: print(" Could not get chapters") return result = tui_pick_chapters(chapters, slug=slug) if result == "back": return cmd_download(picked, chapter_set=result) def tui_upload(): picked = tui_pick_local() if not picked: return if picked == "__all__": cmd_upload() else: cmd_upload(picked) def tui_sync(): picked = tui_pick_manga_url() if not picked: return if picked == "__all__": cmd_sync() else: cmd_sync(picked) def tui_r2_manage(): while True: idx = tui_select("R2 / DB Management", [ "Status", "Delete specific manga", "Clear ALL (R2 + DB)", ]) if idx == -1: break elif idx == 0: # Count R2 objects in single pass slug_counts = {} total = 0 for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET): for obj in pg.get("Contents", []): total += 1 parts = obj["Key"].split("/") if len(parts) >= 2 and parts[0] == "manga": slug_counts[parts[1]] = slug_counts.get(parts[1], 0) + 1 print(f"\n R2: {total} objects, {len(slug_counts)} manga") for slug in sorted(slug_counts): print(f" {manga_display_name(slug)}: {slug_counts[slug]} objects") try: conn = get_db() cur = conn.cursor() cur.execute('SELECT COUNT(*) FROM "Manga"') mc = cur.fetchone()[0] cur.execute('SELECT COUNT(*) FROM "Chapter"') cc = cur.fetchone()[0] cur.execute('SELECT COUNT(*) FROM "Page"') pc = cur.fetchone()[0] print(f" DB: {mc} manga, {cc} chapters, {pc} pages") conn.close() except Exception as e: print(f" DB: {e}") input("\n Press ENTER...") elif idx == 1: picked = tui_pick_r2() if not picked: continue confirm = input(f" Delete {picked} from R2 + DB? [y/N] ").strip().lower() if confirm == "y": r2_delete_prefix(f"manga/{picked}/") try: conn = get_db() cur = conn.cursor() cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (picked,)) row = cur.fetchone() if row: mid = row[0] cur.execute('DELETE FROM "Page" WHERE "chapterId" IN (SELECT id FROM "Chapter" WHERE "mangaId" = %s)', (mid,)) cur.execute('DELETE FROM "Chapter" WHERE "mangaId" = %s', (mid,)) cur.execute('DELETE FROM "Manga" WHERE id = %s', (mid,)) conn.commit() print(f" Removed from R2 + DB") conn.close() except Exception as e: print(f" DB error: {e}") elif idx == 2: confirm = input(" Delete ALL R2 + DB? [y/N] ").strip().lower() if confirm == "y": r2_delete_prefix("") try: conn = get_db() cur = conn.cursor() for t in ['"Page"', '"Chapter"', '"Manga"']: cur.execute(f"DELETE FROM {t}") conn.commit() conn.close() print(" All cleared") except Exception as e: print(f" DB error: {e}") def main(): while True: idx = tui_select("Manga Toolkit", [ "Setup (solve Cloudflare)", "Download", "Upload (local -> R2)", "Sync (site -> R2)", "R2 / DB management", "Quit", ], back=False) if idx is None or idx == -1 or idx == 5: break elif idx == 0: cmd_setup() elif idx == 1: tui_download() elif idx == 2: tui_upload() elif idx == 3: tui_sync() elif idx == 4: tui_r2_manage() print("Bye!") if __name__ == "__main__": main()