""" Manga toolkit — download from m.happymh.com, upload to Cloudflare R2. Usage: python manga.py """ import io import json import os import platform import re import select import sys import time import socket import subprocess import threading IS_MACOS = platform.system() == "Darwin" # POSIX-only TTY modules; EscListener is a no-op on Windows. try: import termios import tty _HAS_TERMIOS = True except ImportError: termios = None tty = None _HAS_TERMIOS = False from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path from urllib.parse import urlparse import boto3 import psycopg2 from PIL import Image from dotenv import load_dotenv from playwright.sync_api import sync_playwright from simple_term_menu import TerminalMenu load_dotenv() # ── Config ───────────────────────────────────────────────── BASE_URL = "https://m.happymh.com" ROOT_DIR = Path(__file__).parent CONTENT_DIR = ROOT_DIR / "manga-content" MANGA_JSON = ROOT_DIR / "manga.json" BROWSER_DATA = ROOT_DIR / ".browser-data" CDP_PORT = 9333 REQUEST_DELAY = 1.5 UPLOAD_WORKERS = 8 CHROME_CANDIDATES = [ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", # macOS "/usr/bin/google-chrome", # Linux "/usr/bin/google-chrome-stable", "/usr/bin/chromium", "/usr/bin/chromium-browser", r"C:\Program Files\Google\Chrome\Application\chrome.exe", # Windows r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe", ] def _find_chrome(): for p in CHROME_CANDIDATES: if Path(p).exists(): return p return None CHROME_PATH = _find_chrome() # R2/DB config loaded lazily so missing .env gives a friendly error, not KeyError on import. _REQUIRED_ENV = ("R2_ACCOUNT_ID", "R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_BUCKET", "R2_PUBLIC_URL", "DATABASE_URL") s3 = None BUCKET = None PUBLIC_URL = None DATABASE_URL = None _config_loaded = False def _ensure_config(): global s3, BUCKET, PUBLIC_URL, DATABASE_URL, _config_loaded if _config_loaded: return missing = [k for k in _REQUIRED_ENV if not os.environ.get(k)] if missing: print("Missing env vars (check .env):") for k in missing: print(f" {k}") sys.exit(1) s3 = boto3.client( "s3", endpoint_url=f"https://{os.environ['R2_ACCOUNT_ID']}.r2.cloudflarestorage.com", aws_access_key_id=os.environ["R2_ACCESS_KEY"], aws_secret_access_key=os.environ["R2_SECRET_KEY"], region_name="auto", ) BUCKET = os.environ["R2_BUCKET"] PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/") DATABASE_URL = os.environ["DATABASE_URL"] _config_loaded = True # ── ESC listener ─────────────────────────────────────────── class EscListener: """Context manager: listens for ESC key in background, sets self.stop event.""" def __init__(self): self.stop = threading.Event() self._thread = None self._old = None self._fd = None def __enter__(self): if not _HAS_TERMIOS or not sys.stdin.isatty(): return self self._fd = sys.stdin.fileno() try: self._old = termios.tcgetattr(self._fd) tty.setcbreak(self._fd) except Exception: self._old = None return self self._thread = threading.Thread(target=self._listen, daemon=True) self._thread.start() return self def _listen(self): while not self.stop.is_set(): try: r, _, _ = select.select([sys.stdin], [], [], 0.2) if r and sys.stdin.read(1) == "\x1b": self.stop.set() print("\n ESC pressed — stopping after current item...") return except Exception: return def __exit__(self, *args): self.stop.set() if self._old is not None: try: termios.tcsetattr(self._fd, termios.TCSADRAIN, self._old) except Exception: pass # ── Chrome management ────────────────────────────────────── def hide_chrome(): """Hide Chrome window (macOS only; no-op elsewhere).""" if not IS_MACOS: return try: subprocess.Popen( ["osascript", "-e", 'tell application "System Events" to set visible of process "Google Chrome" to false'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) except Exception: pass def is_port_open(port): with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: return s.connect_ex(("localhost", port)) == 0 def launch_chrome(start_url=None): if is_port_open(CDP_PORT): return None if not CHROME_PATH or not Path(CHROME_PATH).exists(): print(" Chrome not found. Install Google Chrome or Chromium.") print(" Searched:") for p in CHROME_CANDIDATES: print(f" {p}") return None cmd = [ CHROME_PATH, f"--remote-debugging-port={CDP_PORT}", f"--user-data-dir={BROWSER_DATA}", "--no-first-run", "--no-default-browser-check", "--window-position=0,0", "--window-size=800,600", "--no-focus-on-navigate", ] if start_url: cmd.append(start_url) proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) for _ in range(30): if is_port_open(CDP_PORT): time.sleep(1) hide_chrome() return proc time.sleep(0.5) print(" Chrome failed to start") return None class BrowserSession: """Manages Chrome + CDP lifecycle.""" def __init__(self): self.chrome_proc = None self.playwright = None self.browser = None self.page = None def start(self): self.chrome_proc = launch_chrome() self.playwright = sync_playwright().start() self.browser = self.playwright.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}") context = self.browser.contexts[0] self.page = context.pages[0] if context.pages else context.new_page() def close(self): try: self.browser.close() except Exception: pass if self.chrome_proc: self.chrome_proc.terminate() if self.playwright: self.playwright.stop() _session_singleton = None def get_session(): """Get or lazy-start the global Chrome session.""" global _session_singleton if _session_singleton is None: _session_singleton = BrowserSession() _session_singleton.start() return _session_singleton def close_session(): """Close the global Chrome session (called on exit).""" global _session_singleton if _session_singleton is not None: _session_singleton.close() _session_singleton = None def with_browser(func): """Run func(session) using the persistent Chrome session. If the session crashed (target closed etc.), reset and retry once.""" session = get_session() try: return func(session) except Exception as e: msg = str(e).lower() if "target" in msg or "browser" in msg or "closed" in msg or "disconnected" in msg: print(" Browser session lost, restarting...") close_session() return func(get_session()) raise # ── Cloudflare ───────────────────────────────────────────── def _wait_for_cf_on_page(page, timeout=120): """Wait for CF to resolve on a specific page.""" for i in range(timeout): try: title = page.title() except Exception: time.sleep(1) continue if "Just a moment" in title or "challenge" in page.url: time.sleep(1) continue if title and ("嗨皮漫画" in title or "happymh" in page.url): return True time.sleep(1) return False def wait_for_cloudflare(session, timeout=120): """Wait for CF to resolve. User solves in the visible browser window.""" page = session.page for i in range(timeout): try: title = page.title() except Exception: time.sleep(1) continue if "Just a moment" in title or "challenge" in page.url: if i == 0: print(" CF challenge — solve in browser...") elif i % 15 == 0: print(f" Still waiting for CF... ({i}s)") time.sleep(1) continue if title and ("嗨皮漫画" in title or "happymh" in page.url): return True time.sleep(1) print(" CF timed out.") return False # ── Happymh: chapter fetching ───────────────────────────── def fetch_chapters_via_api(page, slug): result = page.evaluate(""" async (slug) => { const all = []; let total = 0; for (let p = 1; p <= 30; p++) { const url = `/v2.0/apis/manga/chapterByPage?code=${slug}&lang=cn&order=asc&page=${p}&_t=${Date.now()}`; try { const ctrl = new AbortController(); setTimeout(() => ctrl.abort(), 10000); const r = await fetch(url, { signal: ctrl.signal }); if (!r.ok) { if (p === 1) return { error: r.status }; break; } const json = await r.json(); if (!json.data) break; total = json.data.total || total; let items = null; for (const val of Object.values(json.data)) { if (Array.isArray(val) && val.length > 0) { items = val; break; } } if (!items || items.length === 0) break; for (const ch of items) { all.push({ id: String(ch.id || ''), chapterName: ch.chapterName || ch.name || '' }); } if (total && all.length >= total) break; } catch (e) { if (p === 1) return { error: e.message }; break; } } return { chapters: all, total }; } """, slug) if result and result.get("chapters") and len(result["chapters"]) > 0: chapters = result["chapters"] total = result.get("total", len(chapters)) print(f" API: {len(chapters)}/{total} chapters") return chapters if result and result.get("error"): print(f" API error: {result['error']}") return None def fetch_chapters_from_dom(page): try: page.wait_for_selector("a[href*='/mangaread/']", timeout=15000) page.wait_for_timeout(1000) except Exception: return None for selector in ["text=展开全部", "text=查看全部", "text=全部章节", "text=展开更多", "text=更多"]: try: btn = page.query_selector(selector) if btn and btn.is_visible(): btn.click() page.wait_for_timeout(2000) break except Exception: continue try: page.wait_for_selector(".MuiDrawer-paper", timeout=5000) except Exception: pass try: sort_btn = page.query_selector("text=点我改变排序") if sort_btn and sort_btn.is_visible(): sort_btn.click() page.wait_for_timeout(2000) except Exception: pass total = page.evaluate(""" () => { const spans = document.querySelectorAll('.MuiDrawer-paper span'); for (const s of spans) { const m = s.textContent.match(/共(\\d+)个章节/); if (m) return parseInt(m[1]); } return 0; } """) for _ in range(50): count = page.evaluate("document.querySelectorAll('.MuiDrawer-paper a[href*=\"/mangaread/\"]').length") if total and count >= total: break clicked = page.evaluate(""" () => { const walker = document.createTreeWalker( document.querySelector('.MuiDrawer-paper') || document.body, NodeFilter.SHOW_TEXT ); while (walker.nextNode()) { if (walker.currentNode.textContent.includes('加载更多')) { let el = walker.currentNode.parentElement; while (el && el.tagName !== 'LI') el = el.parentElement; if (el) { el.click(); return true; } walker.currentNode.parentElement.click(); return true; } } return false; } """) if not clicked: break page.wait_for_timeout(1000) chapters = page.evaluate(""" () => { const container = document.querySelector('.MuiDrawer-paper') || document; const links = container.querySelectorAll('a[href*="/mangaread/"]'); const chapters = [], seen = new Set(); links.forEach(a => { const match = a.getAttribute('href').match(/\\/mangaread\\/[^/]+\\/(\\d+)/); if (match && !seen.has(match[1])) { seen.add(match[1]); const name = a.textContent.trim(); if (name && name !== '开始阅读') chapters.push({ id: match[1], chapterName: name }); } }); return chapters; } """) try: page.keyboard.press("Escape") except Exception: pass return chapters if chapters else None # ── Happymh: metadata & cover ───────────────────────────── def fetch_metadata(page): html_text = page.content() metadata = {"mg-url": page.url} m = re.search(r'

(.*?)

', html_text) if m: metadata["mg-title"] = m.group(1).strip() m = re.search(r'

.*?]*>(.*?)', html_text, re.DOTALL) if m: metadata["mg-author"] = m.group(1).strip() genre_matches = re.findall(r'

.*?

', html_text, re.DOTALL) if genre_matches: metadata["mg-genres"] = re.findall(r']*>(.*?)', genre_matches[0]) m = re.search(r'
.*?]*>(.*?)

', html_text, re.DOTALL) if m: metadata["mg-description"] = m.group(1).strip() if not metadata.get("mg-description"): m = re.search(r']*>(.*?)', html_text, re.DOTALL) if m: desc = re.sub(r'<[^>]+>', '', m.group(1)).strip() if desc: metadata["mg-description"] = desc cover_url = page.evaluate(""" () => { const og = document.querySelector('meta[property="og:image"]'); if (og) return og.content; for (const sel of ['img.mg-cover', 'img[src*="mcover"]']) { const img = document.querySelector(sel); if (img && img.src) return img.src; } return null; } """) if cover_url: metadata["mg-cover"] = cover_url return metadata # ── Happymh: image download ─────────────────────────────── def _try_get_chapter_images(page, slug, chapter_id): """Single attempt to get chapter images. Returns (images, api_status).""" captured_images = [] api_info = {"found": False, "status": None, "error": None} def on_response(response): if "/apis/manga/reading" not in response.url: return # Only capture our chapter, skip prefetched ones if f"cid={chapter_id}" not in response.url and f"cid%3D{chapter_id}" not in response.url: return # Ignore if we already captured images (prevent duplicate/prefetch) if captured_images: return api_info["found"] = True api_info["status"] = response.status if response.status != 200: api_info["error"] = f"status {response.status}" return try: data = response.json() # Verify chapter ID in response body resp_cid = str(data.get("data", {}).get("id", "")) if resp_cid and resp_cid != str(chapter_id): return scans = data.get("data", {}).get("scans", []) if isinstance(scans, str): scans = json.loads(scans) for scan in scans: if isinstance(scan, dict) and "url" in scan: captured_images.append({ "url": scan["url"], "no_referrer": scan.get("r", 0) != 0, }) except Exception as e: api_info["error"] = str(e) page.on("response", on_response) reader_url = f"{BASE_URL}/mangaread/{slug}/{chapter_id}" try: page.evaluate(f"window.location.href = '{reader_url}'") except Exception: pass hide_chrome() time.sleep(2) try: page.evaluate("window.close = () => {}") except Exception: pass if not _wait_for_cf_on_page(page, timeout=90): try: page.remove_listener("response", on_response) except Exception: pass return [], api_info deadline = time.time() + 20 while time.time() < deadline: if captured_images: break try: page.wait_for_timeout(500) except Exception: break try: page.remove_listener("response", on_response) except Exception: pass if not api_info["found"]: print(" API not intercepted") elif api_info["error"]: print(f" API: {api_info['error']}") # Filter out next-chapter preview images by counting DOM containers if captured_images: try: counts = page.evaluate(""" () => { const all = document.querySelectorAll('[class*="imgContainer"]').length; const next = document.querySelectorAll('[class*="imgNext"]').length; return { all, next, current: all - next }; } """) if counts and counts.get("next", 0) > 0: actual = counts["current"] if 0 < actual < len(captured_images): captured_images = captured_images[:actual] except Exception: pass # DOM fallback if not captured_images: try: page.wait_for_timeout(3000) dom_images = page.evaluate(""" () => { const imgs = document.querySelectorAll('img[src*="http"]'); const nextImgs = new Set( Array.from(document.querySelectorAll('[class*="imgNext"] img')) .map(img => img.src) ); const urls = [], seen = new Set(); imgs.forEach(img => { const src = img.src || ''; if (src && !seen.has(src) && !nextImgs.has(src) && !src.includes('/mcover/') && !src.includes('cloudflare') && !src.includes('.svg')) { seen.add(src); urls.push(src); } }); return urls; } """) if dom_images: print(f" DOM: {len(dom_images)} images") for u in dom_images: captured_images.append({"url": u, "no_referrer": False}) except Exception as e: print(f" DOM failed: {e}") return captured_images, api_info def get_chapter_images(page, slug, chapter_id): """Get chapter images using given page. On API 403, returns empty (caller should handle CF).""" images, api_info = _try_get_chapter_images(page, slug, chapter_id) return images, api_info def fetch_all_pages(page, images, max_attempts=3): """Fetch all pages with retry using given page. Returns {page_num: bytes}.""" total = len(images) page_bytes = {} pending = list(enumerate(images, 1)) for attempt in range(1, max_attempts + 1): if not pending: break if attempt > 1: time.sleep(2) next_pending = [] for pn, img in pending: body = fetch_image_bytes(page, img) if body: page_bytes[pn] = body else: next_pending.append((pn, img)) time.sleep(0.1) pending = next_pending return page_bytes def _fetch_via_page(page, url, ref_policy): try: with page.expect_response(lambda r: url.split("?")[0] in r.url, timeout=15000) as resp_info: page.evaluate("([u, r]) => fetch(u, { referrerPolicy: r })", [url, ref_policy]) response = resp_info.value if response.status == 200: body = response.body() if body and len(body) > 100: return body except Exception: pass return None def fetch_image_bytes(page, img): """Fetch image via browser network stack using given page. Tries URL variants on failure.""" url = img["url"] ref_policy = "no-referrer" if img.get("no_referrer") else "origin" # Try original URL body = _fetch_via_page(page, url, ref_policy) if body: return body # Fallback: strip query string (e.g., ?q=50) if "?" in url: body = _fetch_via_page(page, url.split("?")[0], ref_policy) if body: return body return None def download_image(page, img, save_path): """Fetch image and save to disk.""" if save_path.exists(): return True body = fetch_image_bytes(page, img) if body: save_path.parent.mkdir(parents=True, exist_ok=True) save_path.write_bytes(body) return True return False # ── R2 / Upload ──────────────────────────────────────────── WEBP_QUALITY = 75 def _to_webp_bytes(img, quality=WEBP_QUALITY, method=6): buf = io.BytesIO() img.save(buf, format="WEBP", quality=quality, method=method) return buf.getvalue() def convert_to_webp(source, quality=WEBP_QUALITY): return _to_webp_bytes(Image.open(source), quality) def probe_and_webp(source, quality=WEBP_QUALITY): """Open once; return (width, height, webp_bytes).""" with Image.open(source) as img: return img.width, img.height, _to_webp_bytes(img, quality) def insert_pages(cur, chapter_id, page_urls): """page_urls: {page_num: (url, width, height)}. Inserts in page_num order.""" for pn in sorted(page_urls): url, w, h = page_urls[pn] cur.execute( 'INSERT INTO "Page" ("chapterId", number, "imageUrl", width, height) VALUES (%s, %s, %s, %s, %s)', (chapter_id, pn, url, w, h), ) def make_cover(source, width=400, height=560): img = Image.open(source) target_ratio = width / height img_ratio = img.width / img.height if img_ratio > target_ratio: new_width = int(img.height * target_ratio) left = (img.width - new_width) // 2 img = img.crop((left, 0, left + new_width, img.height)) else: new_height = int(img.width / target_ratio) img = img.crop((0, 0, img.width, new_height)) img = img.resize((width, height), Image.LANCZOS) return _to_webp_bytes(img, quality=80) def upload_to_r2(key, data, content_type="image/webp"): _ensure_config() s3.put_object(Bucket=BUCKET, Key=key, Body=data, ContentType=content_type) return f"{PUBLIC_URL}/{key}" def r2_key_exists(key): _ensure_config() try: s3.head_object(Bucket=BUCKET, Key=key) return True except s3.exceptions.ClientError: return False def get_db(): _ensure_config() conn = psycopg2.connect(DATABASE_URL) conn.set_client_encoding("UTF8") return conn def parse_chapter_dir(dir_name): m = re.match(r"^(\d+)\s+(.+)$", dir_name) if m: return int(m.group(1)), m.group(2) return 0, dir_name # ── Helpers ──────────────────────────────────────────────── def load_manga_urls(): if not MANGA_JSON.exists(): return [] data = json.loads(MANGA_JSON.read_text(encoding="utf-8")) return data if isinstance(data, list) else [] def slug_from_url(url): return urlparse(url).path.strip("/").split("/")[-1] def get_existing_chapters(manga_dir): existing = set() if manga_dir.exists(): for entry in manga_dir.iterdir(): if entry.is_dir() and any(entry.glob("*.jpg")): existing.add(entry.name) return existing def list_local_manga(): if not CONTENT_DIR.exists(): return [] return sorted(d.name for d in CONTENT_DIR.iterdir() if d.is_dir() and not d.name.startswith(".")) # ── Core: download manga ────────────────────────────────── def load_manga_page(session, slug): """Navigate to manga page, pass CF, return (chapters, metadata, cover_bytes) or None.""" cover_responses = {} def on_cover(response): if "/mcover/" in response.url and response.status == 200: try: cover_responses[response.url] = response.body() except Exception: pass page = session.page page.on("response", on_cover) print(" Loading manga page...") try: page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000) except Exception: pass hide_chrome() if not wait_for_cloudflare(session): page = session.page try: page.remove_listener("response", on_cover) except Exception: pass return None page = session.page # may have changed after CF restart print(" Fetching chapters...") chapters = fetch_chapters_via_api(page, slug) if not chapters: print(" API failed, trying DOM...") chapters = fetch_chapters_from_dom(page) metadata = fetch_metadata(page) # Wait for cover image to be present in DOM (up to 8s) cover_url = None for _ in range(16): cover_url = page.evaluate(""" () => { const sels = ['img.mip-fill-content[src*="mcover"]', 'img[src*="/mcover/"]']; for (const s of sels) { const img = document.querySelector(s); if (img && img.src) return img.src; } return null; } """) if cover_url: break page.wait_for_timeout(500) # Give the response another moment to be captured if cover_url and cover_url not in cover_responses: page.wait_for_timeout(1500) try: page.remove_listener("response", on_cover) except Exception: pass cover_body = None if cover_url: cover_body = cover_responses.get(cover_url) if not cover_body: for url, data in cover_responses.items(): if cover_url.split("?")[0] in url or url.split("?")[0] in cover_url: cover_body = data break if not cover_body: if cover_url: print(f" Cover URL found but body not captured ({len(cover_responses)} responses)") else: print(f" No cover URL found in DOM") return chapters, metadata, cover_body def save_manga_local(slug, metadata, cover_body): """Save metadata and cover to local manga-content/.""" manga_dir = CONTENT_DIR / slug manga_dir.mkdir(parents=True, exist_ok=True) detail_path = manga_dir / "detail.json" if metadata: existing = {} if detail_path.exists(): try: existing = json.loads(detail_path.read_text(encoding="utf-8")) except json.JSONDecodeError: pass existing.update(metadata) detail_path.write_text(json.dumps(existing, ensure_ascii=False, indent=4), encoding="utf-8") cover_path = manga_dir / "cover.jpg" if not cover_path.exists() and cover_body and len(cover_body) > 100: cover_path.write_bytes(cover_body) print(f" Cover saved ({len(cover_body)} bytes)") def download_chapter(session, slug, chapter_index, chapter, manga_dir): """Download a single chapter's images. Returns True if successful.""" ch_id = chapter["id"] ch_name = chapter["chapterName"] folder_name = f"{chapter_index} {ch_name}" chapter_dir = manga_dir / folder_name images, _ = get_chapter_images(session.page, slug, ch_id) if not images: print(f" No images") return False print(f" {len(images)} pages") chapter_dir.mkdir(parents=True, exist_ok=True) page_bytes = fetch_all_pages(session.page, images) ok = 0 for pn, body in page_bytes.items(): save_path = chapter_dir / f"{pn}.jpg" save_path.write_bytes(body) ok += 1 print(f" {ok}/{len(images)} downloaded" + " " * 20) if ok < len(images): try: chapter_dir.rmdir() except Exception: pass return False time.sleep(REQUEST_DELAY) return True # ── Core: upload manga ──────────────────────────────────── def upload_manga_to_r2(manga_name, conn): """Upload a local manga to R2 and create DB records.""" manga_path = CONTENT_DIR / manga_name detail_path = manga_path / "detail.json" if not detail_path.exists(): print(f" Skipping {manga_name}: no detail.json") return detail = json.loads(detail_path.read_text(encoding="utf-8")) title = detail.get("mg-title", manga_name) slug = manga_name genres = detail.get("mg-genres", []) description = detail.get("mg-description", "") genre = ", ".join(genres) if genres else "Drama" cur = conn.cursor() # Cover cover_file = manga_path / "cover.jpg" cover_url = "" cover_key = f"manga/{slug}/cover.webp" if cover_file.exists(): if not r2_key_exists(cover_key): cover_url = upload_to_r2(cover_key, make_cover(cover_file)) print(f" Cover uploaded") else: cover_url = f"{PUBLIC_URL}/{cover_key}" # Manga record cur.execute('SELECT id, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,)) row = cur.fetchone() if row: manga_id, existing_cover = row if cover_url and cover_url != existing_cover: cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id)) conn.commit() else: cur.execute( 'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") ' "VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id", (title, description, cover_url, slug, genre), ) manga_id = cur.fetchone()[0] conn.commit() print(f" Created manga (id: {manga_id})") # Chapters chapter_dirs = sorted( [d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")], key=lambda d: parse_chapter_dir(d.name)[0], ) for chapter_dir in chapter_dirs: order_num, chapter_title = parse_chapter_dir(chapter_dir.name) if order_num == 0: continue cur.execute('SELECT id FROM "Chapter" WHERE "mangaId" = %s AND number = %s', (manga_id, order_num)) if cur.fetchone(): print(f" [{order_num}] {chapter_title} — skip") continue page_files = sorted( [f for f in chapter_dir.iterdir() if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")], key=lambda f: int(re.search(r"(\d+)", f.stem).group(1)) if re.search(r"(\d+)", f.stem) else 0, ) if not page_files: continue print(f" [{order_num}] {chapter_title} ({len(page_files)} pages)") # Upload to R2 first def process_page(args, _slug=slug, _order=order_num): j, pf = args r2_key = f"manga/{_slug}/chapters/{_order}/{j}.webp" if r2_key_exists(r2_key): with Image.open(pf) as img: return j, f"{PUBLIC_URL}/{r2_key}", img.width, img.height w, h, webp = probe_and_webp(pf) return j, upload_to_r2(r2_key, webp), w, h page_urls = {} done = 0 with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool: futures = {pool.submit(process_page, (j, f)): j for j, f in enumerate(page_files, 1)} for future in as_completed(futures): j, url, w, h = future.result() page_urls[j] = (url, w, h) done += 1 print(f" {done}/{len(page_files)}", end="\r") if not page_urls: print(f" Upload failed, skip") continue # DB records only after R2 upload succeeds cur.execute( 'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id', (manga_id, order_num, chapter_title), ) chapter_id = cur.fetchone()[0] insert_pages(cur, chapter_id, page_urls) conn.commit() print(f" {len(page_files)} pages uploaded" + " " * 10) # ── Commands ─────────────────────────────────────────────── def cmd_setup(): print("\n Chrome will open. Solve Cloudflare on:") print(" 1. m.happymh.com") print(" 2. Any manga page") print(" 3. Any reader page\n") session = get_session() try: session.page.goto(BASE_URL, wait_until="commit", timeout=60000) except Exception: pass # Bring Chrome to front for setup try: subprocess.Popen( ["osascript", "-e", 'tell application "Google Chrome" to activate'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, ) except Exception: pass input(" Press ENTER when done... ") cookies = session.browser.contexts[0].cookies() cf = [c for c in cookies if c["name"] == "cf_clearance"] print(f" cf_clearance: {'found' if cf else 'NOT found'}") hide_chrome() print() def cmd_download(manga_url=None, chapter_set=None): """Download manga. chapter_set is a set of 1-based indices, or None for all.""" urls = [manga_url] if manga_url else load_manga_urls() if not urls: print(" No URLs in manga.json") return print(f"\n Downloading {len(urls)} manga(s)... (ESC to stop)\n") def run(session): with EscListener() as esc: for url in urls: if esc.stop.is_set(): break slug = slug_from_url(url) try: result = load_manga_page(session, slug) if not result: continue chapters, metadata, cover_body = result if not chapters: print(" No chapters found.") continue print(f" Found {len(chapters)} chapters") save_manga_local(slug, metadata, cover_body) existing = get_existing_chapters(CONTENT_DIR / slug) for i, ch in enumerate(chapters, 1): if esc.stop.is_set(): break if chapter_set and i not in chapter_set: continue if any(ch["chapterName"] in name for name in existing): print(f" [{i}/{len(chapters)}] {ch['chapterName']} — skip") continue print(f" [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})") download_chapter(session, slug, i, ch, CONTENT_DIR / slug) print(f"\n Done: {slug}") except Exception as e: print(f"\n Error: {url}: {e}") import traceback traceback.print_exc() with_browser(run) print("\nDownload complete!") def cmd_upload(manga_name=None): if manga_name: names = [manga_name] else: names = list_local_manga() if not names: print(" No manga in manga-content/") return print(f"\n Uploading {len(names)} manga(s)... (ESC to stop)") conn = get_db() try: with EscListener() as esc: for name in names: if esc.stop.is_set(): break print(f"\n {'='*50}") print(f" {name}") print(f" {'='*50}") upload_manga_to_r2(name, conn) finally: conn.close() print("\nUpload complete!") def cmd_sync(manga_url=None): """Sync: fetch latest chapters, stream directly to R2 (no local save).""" urls = [manga_url] if manga_url else load_manga_urls() if not urls: print(" No URLs in manga.json") return conn = get_db() def run(session): with EscListener() as esc: for url in urls: if esc.stop.is_set(): break slug = slug_from_url(url) print(f"\n{'='*60}") print(f"Syncing: {slug}") print(f"{'='*60}") # 1. Load manga page + get chapters result = load_manga_page(session, slug) if not result: continue chapters, metadata, cover_body = result if not chapters: print(" No chapters found.") continue print(f" {len(chapters)} chapters on site") # 2. Ensure manga in DB cur = conn.cursor() title = metadata.get("mg-title", slug) genres = metadata.get("mg-genres", []) description = metadata.get("mg-description", "") genre = ", ".join(genres) if genres else "Drama" # Cover → R2 (from RAM) cover_url = "" cover_key = f"manga/{slug}/cover.webp" if cover_body and len(cover_body) > 100: if not r2_key_exists(cover_key): cover_webp = make_cover(io.BytesIO(cover_body)) cover_url = upload_to_r2(cover_key, cover_webp) print(f" Cover uploaded to R2") else: cover_url = f"{PUBLIC_URL}/{cover_key}" cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (slug,)) row = cur.fetchone() if row: manga_id = row[0] # Refresh metadata fields (cover only updated if we have a new one) if cover_url: cur.execute( 'UPDATE "Manga" SET title = %s, description = %s, genre = %s, ' '"coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (title, description, genre, cover_url, manga_id), ) else: cur.execute( 'UPDATE "Manga" SET title = %s, description = %s, genre = %s, ' '"updatedAt" = NOW() WHERE id = %s', (title, description, genre, manga_id), ) conn.commit() print(f" Updated metadata (genre: {genre})") else: cur.execute( 'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") ' "VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id", (title, description, cover_url, slug, genre), ) manga_id = cur.fetchone()[0] conn.commit() print(f" Created manga in DB (id: {manga_id})") # 3. Find chapters missing from DB cur.execute('SELECT number FROM "Chapter" WHERE "mangaId" = %s', (manga_id,)) existing_numbers = {row[0] for row in cur.fetchall()} # 3. Collect chapters to sync todo = [(i, ch) for i, ch in enumerate(chapters, 1) if i not in existing_numbers] if not todo: print(" Already up to date!") continue print(f" {len(todo)} new chapters to sync") completed = 0 skipped = 0 for i, ch in todo: if esc.stop.is_set(): break ch_name = ch["chapterName"] print(f" [{i}/{len(chapters)}] {ch_name} (id={ch['id']})") images, api_info = get_chapter_images(session.page, slug, ch["id"]) if not images and api_info.get("status") == 403: print(f" CF blocked — run Setup and try again") esc.stop.set() break if not images: print(f" No images") skipped += 1 continue print(f" {len(images)} pages") page_bytes = fetch_all_pages(session.page, images) if len(page_bytes) < len(images): missing = [pn for pn in range(1, len(images) + 1) if pn not in page_bytes] print(f" Could not fetch pages: {missing}, skipping chapter") skipped += 1 continue def upload_one(args, _slug=slug, _i=i): pn, raw = args r2_key = f"manga/{_slug}/chapters/{_i}/{pn}.webp" w, h, webp = probe_and_webp(io.BytesIO(raw)) return pn, upload_to_r2(r2_key, webp), w, h page_urls = {} done = 0 with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool: for pn, r2_url, w, h in pool.map(upload_one, page_bytes.items()): page_urls[pn] = (r2_url, w, h) done += 1 print(f" R2: {done}/{len(page_bytes)}", end="\r") if not page_urls: skipped += 1 continue cur.execute( 'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id', (manga_id, i, ch_name), ) chapter_id = cur.fetchone()[0] insert_pages(cur, chapter_id, page_urls) conn.commit() completed += 1 print(f" {len(page_urls)} pages synced" + " " * 20) time.sleep(REQUEST_DELAY) print(f" Synced {completed}/{len(todo)} chapters ({skipped} skipped)") try: with_browser(run) finally: conn.close() print("\nSync complete!") def r2_list_prefixes(): """List manga slugs in R2 by scanning top-level prefixes under manga/.""" _ensure_config() slugs = set() paginator = s3.get_paginator("list_objects_v2") for pg in paginator.paginate(Bucket=BUCKET, Prefix="manga/", Delimiter="/"): for prefix in pg.get("CommonPrefixes", []): # "manga/slug/" -> "slug" slug = prefix["Prefix"].split("/")[1] if slug: slugs.add(slug) return sorted(slugs) def r2_count_by_prefix(prefix): """Count objects under a prefix.""" _ensure_config() total = 0 for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix): total += len(pg.get("Contents", [])) return total def r2_delete_prefix(prefix): """Delete all objects under a prefix.""" _ensure_config() total = 0 batches = [] for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix): objects = pg.get("Contents", []) if objects: batches.append([{"Key": obj["Key"]} for obj in objects]) def delete_batch(keys): s3.delete_objects(Bucket=BUCKET, Delete={"Objects": keys}) return len(keys) with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool: for count in pool.map(delete_batch, batches): total += count print(f" {total} deleted", end="\r") print(f" {total} objects deleted" + " " * 10) return total def r2_recompress(slug, quality=65): """Download all webp images for a manga, re-encode at lower quality, re-upload.""" _ensure_config() prefix = f"manga/{slug}/" keys = [] for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix): for obj in pg.get("Contents", []): if obj["Key"].endswith(".webp"): keys.append(obj["Key"]) if not keys: print(f" No webp files for {slug}") return print(f" {len(keys)} files to recompress (quality={quality})") saved_total = 0 failed = 0 def recompress_one(key): try: original = s3.get_object(Bucket=BUCKET, Key=key)["Body"].read() new_data = _to_webp_bytes(Image.open(io.BytesIO(original)), quality=quality) saved = len(original) - len(new_data) if saved > 0: s3.put_object(Bucket=BUCKET, Key=key, Body=new_data, ContentType="image/webp") return saved return 0 except Exception: return -1 done = 0 with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool: for saved in pool.map(recompress_one, keys): done += 1 if saved < 0: failed += 1 else: saved_total += saved print(f" {done}/{len(keys)} — saved {saved_total // 1024} KB", end="\r") msg = f" Done: {done}/{len(keys)} processed, {saved_total // (1024 * 1024)} MB saved" if failed: msg += f" ({failed} failed)" print(msg + " " * 10) # ── TUI ──────────────────────────────────────────────────── def tui_select(title, options, back=True, search=False): """Arrow-key menu. Returns selected index or -1.""" items = list(options) if back: items.append("[Back]") menu = TerminalMenu( items, title=title, search_key="/" if search else None, show_search_hint=search, ) idx = menu.show() if idx is None or (back and idx == len(items) - 1): return -1 return idx _title_cache = {} def get_manga_title(slug): """Read manga title from detail.json or DB, fallback to slug.""" if slug in _title_cache: return _title_cache[slug] # Try local detail.json first detail_path = CONTENT_DIR / slug / "detail.json" if detail_path.exists(): try: detail = json.loads(detail_path.read_text(encoding="utf-8")) title = detail.get("mg-title") if title: _title_cache[slug] = title return title except Exception: pass # Try database (batch load all titles) try: conn = get_db() cur = conn.cursor() cur.execute('SELECT slug, title FROM "Manga"') for row in cur.fetchall(): _title_cache[row[0]] = row[1] conn.close() if slug in _title_cache: return _title_cache[slug] except Exception: pass return slug def manga_display_name(slug): """Format: 'title (slug)' or just 'slug'.""" title = get_manga_title(slug) if title != slug: return f"{title} ({slug})" return slug def tui_pick_manga_url(include_all=True): """Pick manga from manga.json. Shows title + slug.""" urls = load_manga_urls() if not urls: print(" No URLs in manga.json") return None slugs = [slug_from_url(u) for u in urls] items = [] if include_all: items.append("All manga") items += [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)] idx = tui_select("Select manga (/ to search):", items, search=True) if idx < 0: return None if include_all: if idx == 0: return "__all__" return urls[idx - 1] return urls[idx] def tui_pick_local(include_all=True): """Pick from local manga-content/. Shows title + slug.""" local = list_local_manga() if not local: print(" No manga in manga-content/") return None items = [] if include_all: items.append("All manga") items += [f"{i+1}. {manga_display_name(name)}" for i, name in enumerate(local)] idx = tui_select("Select manga (/ to search):", items, search=True) if idx < 0: return None if include_all: if idx == 0: return "__all__" return local[idx - 1] return local[idx] def tui_pick_r2(): """Pick manga from R2. Shows title + slug.""" slugs = r2_list_prefixes() if not slugs: print(" R2 is empty") return None items = [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)] idx = tui_select("Select manga (/ to search):", items, search=True) return slugs[idx] if idx >= 0 else None def tui_pick_chapters(chapters, slug=None): """Multi-select chapter picker. Space to toggle, Enter to confirm. Existing chapters shown grayed out. Returns set of selected 1-based indices, or None for all.""" # Check which chapters already exist locally existing = set() if slug: existing = get_existing_chapters(CONTENT_DIR / slug) # Count existing existing_count = 0 for i, ch in enumerate(chapters, 1): if any(ch["chapterName"] in name for name in existing): existing_count += 1 idx = tui_select(f"{len(chapters)} chapters ({existing_count} downloaded)", [ "All chapters (skip existing)", "Select chapters (space to toggle)", ]) if idx == -1: return "back" if idx == 0: return None # all items = [] for i, ch in enumerate(chapters, 1): done = any(ch["chapterName"] in name for name in existing) label = f"{i}. {ch['chapterName']}" if done: label = f"\033[90m{label} [done]\033[0m" items.append(label) menu = TerminalMenu( items, title="Space=toggle, Enter=confirm, /=search:", multi_select=True, show_multi_select_hint=True, search_key="/", show_search_hint=True, ) selected = menu.show() if selected is None: return "back" if isinstance(selected, int): selected = (selected,) return {i + 1 for i in selected} # 1-based def tui_download(): picked = tui_pick_manga_url() if not picked: return if picked == "__all__": cmd_download() return slug = slug_from_url(picked) print(f"\n Fetching chapters for {slug}...") def get_chapters(session): page = session.page try: page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000) except Exception: pass if not wait_for_cloudflare(session): return None return fetch_chapters_via_api(session.page, slug) chapters = with_browser(get_chapters) if not chapters: print(" Could not get chapters") return result = tui_pick_chapters(chapters, slug=slug) if result == "back": return cmd_download(picked, chapter_set=result) def tui_upload(): picked = tui_pick_local() if not picked: return if picked == "__all__": cmd_upload() else: cmd_upload(picked) def tui_sync(): picked = tui_pick_manga_url() if not picked: return if picked == "__all__": cmd_sync() else: cmd_sync(picked) def tui_edit_manga(): """Edit manga metadata (title, description, genre, status) in DB.""" try: conn = get_db() cur = conn.cursor() cur.execute('SELECT slug, title FROM "Manga" ORDER BY title') rows = cur.fetchall() except Exception as e: print(f" DB error: {e}") return if not rows: print(" No manga in DB") conn.close() return items = [f"{i+1}. {title} ({slug})" for i, (slug, title) in enumerate(rows)] sel = tui_select("Select manga to edit (/ to search):", items, search=True) if sel < 0: conn.close() return slug, _ = rows[sel] cur.execute('SELECT id, title, description, genre, status, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,)) row = cur.fetchone() if not row: print(" Not found") conn.close() return mid, title, description, genre, status, cover_url = row while True: print(f"\n Editing: {slug}") print(f" title: {title}") print(f" description: {(description or '')[:80]}{'...' if description and len(description) > 80 else ''}") print(f" genre: {genre}") print(f" status: {status}") print(f" coverUrl: {cover_url}") idx = tui_select("Edit field", [ "title", "description", "genre", "status", "coverUrl", "Save & exit", "Discard & exit", ]) if idx == -1 or idx == 6: print(" Discarded.") break if idx == 5: cur.execute( 'UPDATE "Manga" SET title = %s, description = %s, genre = %s, ' 'status = %s, "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (title, description, genre, status, cover_url, mid), ) conn.commit() print(" Saved.") break if idx == 3: # status opts = ["PUBLISHED", "DRAFT", "HIDDEN"] s_idx = tui_select("Status:", opts) if s_idx >= 0: status = opts[s_idx] else: field_name = ["title", "description", "genre", "status", "coverUrl"][idx] current = {"title": title, "description": description or "", "genre": genre, "coverUrl": cover_url or ""}[field_name] print(f" Current: {current}") new_val = input(f" New {field_name} (empty=keep): ").strip() if new_val: if idx == 0: title = new_val elif idx == 1: description = new_val elif idx == 2: genre = new_val elif idx == 4: cover_url = new_val conn.close() def _pick_manga_and_chapters(conn, prompt="Select chapters", multi=True): """Helper: pick manga from DB, then pick chapter(s). Returns (slug, [(ch_id, ch_num, ch_title), ...]) or None.""" cur = conn.cursor() cur.execute('SELECT id, slug, title FROM "Manga" ORDER BY title') mangas = cur.fetchall() if not mangas: print(" No manga in DB") return None items = [f"{i+1}. {title} ({slug})" for i, (_, slug, title) in enumerate(mangas)] sel = tui_select("Select manga (/ to search):", items, search=True) if sel < 0: return None manga_id, slug, _ = mangas[sel] cur.execute('SELECT id, number, title FROM "Chapter" WHERE "mangaId" = %s ORDER BY number', (manga_id,)) chapters = cur.fetchall() if not chapters: print(" No chapters in DB for this manga") return None if multi: scope = tui_select(f"{prompt}: {len(chapters)} chapters", [ "All chapters", "Select specific chapters", ]) if scope == -1: return None if scope == 0: return slug, list(chapters) items = [f"{num}. {title}" for _, num, title in chapters] menu = TerminalMenu( items, title="Space=toggle, Enter=confirm, /=search:", multi_select=True, show_multi_select_hint=True, search_key="/", show_search_hint=True, ) selected = menu.show() if not selected: return None if isinstance(selected, int): selected = (selected,) picked = [chapters[i] for i in selected] else: items = [f"{num}. {title}" for _, num, title in chapters] sel = tui_select(f"{prompt} (/ to search):", items, search=True) if sel < 0: return None picked = [chapters[sel]] return slug, picked def tui_delete_chapter(): """Delete specific chapter(s) from R2 + DB.""" try: conn = get_db() except Exception as e: print(f" DB error: {e}") return try: result = _pick_manga_and_chapters(conn, "Select chapters to delete") if not result: return slug, to_delete = result confirm = input(f" Delete {len(to_delete)} chapter(s) from R2 + DB? [y/N] ").strip().lower() if confirm != "y": print(" Cancelled.") return cur = conn.cursor() for ch_id, ch_num, ch_title in to_delete: print(f" Deleting [{ch_num}] {ch_title}...") r2_delete_prefix(f"manga/{slug}/chapters/{ch_num}/") cur.execute('DELETE FROM "Page" WHERE "chapterId" = %s', (ch_id,)) cur.execute('DELETE FROM "Chapter" WHERE id = %s', (ch_id,)) conn.commit() print(f" Done.") finally: conn.close() def tui_check_missing_pages(): """Check selected chapters against the site's actual page count and re-upload if mismatched.""" try: conn = get_db() except Exception as e: print(f" DB error: {e}") return try: result = _pick_manga_and_chapters(conn, "Select chapters to check") if not result: return slug, selected_chapters = result if slug not in [slug_from_url(u) for u in load_manga_urls()]: print(f" {slug} not in manga.json — cannot re-fetch pages") return except Exception: conn.close() raise # Load reader pages and compare site's actual page count vs R2 def run(session): with EscListener() as esc: result = load_manga_page(session, slug) if not result: return chapters, _, _ = result if not chapters: return cur2 = conn.cursor() fixed_dims = 0 reuploaded = 0 print(f"\n Checking {len(selected_chapters)} chapters...") for ch_id, ch_num, ch_title in selected_chapters: if esc.stop.is_set(): break if ch_num > len(chapters): print(f" [{ch_num}] {ch_title}: out of range on site") continue ch = chapters[ch_num - 1] images, api_info = get_chapter_images(session.page, slug, ch["id"]) if not images: if api_info.get("status") == 403: print(f" [{ch_num}] CF blocked — run Setup") esc.stop.set() break print(f" [{ch_num}] {ch_title}: no images from site") continue site_count = len(images) r2_count = r2_count_by_prefix(f"manga/{slug}/chapters/{ch_num}/") if site_count != r2_count: print(f" [{ch_num}] {ch_title}: site={site_count}, R2={r2_count} — re-uploading...") # Re-upload IMMEDIATELY while browser is on this chapter's reader page page_bytes = fetch_all_pages(session.page, images) if len(page_bytes) < len(images): missing = [pn for pn in range(1, len(images) + 1) if pn not in page_bytes] print(f" Could not fetch pages: {missing}") for mn in missing: print(f" page {mn}: {images[mn-1]['url']}") print(f" Skipping chapter") continue def upload_page(args, _slug=slug, _n=ch_num): pn, raw = args r2_key = f"manga/{_slug}/chapters/{_n}/{pn}.webp" with Image.open(io.BytesIO(raw)) as img: w, h = img.width, img.height return pn, upload_to_r2(r2_key, convert_to_webp(io.BytesIO(raw))), w, h page_urls = {} done = 0 with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool: for pn, r2_url, w, h in pool.map(upload_page, page_bytes.items()): page_urls[pn] = (r2_url, w, h) done += 1 print(f" R2: {done}/{len(page_bytes)}", end="\r") cur2.execute('DELETE FROM "Page" WHERE "chapterId" = %s', (ch_id,)) for pn in sorted(page_urls): url, w, h = page_urls[pn] cur2.execute( 'INSERT INTO "Page" ("chapterId", number, "imageUrl", width, height) VALUES (%s, %s, %s, %s, %s)', (ch_id, pn, url, w, h), ) conn.commit() reuploaded += 1 print(f" {len(page_urls)} pages restored" + " " * 20) continue # Count matches — check if DB has valid width/height for all pages cur2.execute( 'SELECT COUNT(*), ' 'COUNT(*) FILTER (WHERE width IS NULL OR width <= 0), ' 'COUNT(*) FILTER (WHERE height IS NULL OR height <= 0), ' 'MIN(width), MAX(width), MIN(height), MAX(height) ' 'FROM "Page" WHERE "chapterId" = %s', (ch_id,), ) db_count, bad_w, bad_h, min_w, max_w, min_h, max_h = cur2.fetchone() bad_count = max(bad_w, bad_h) if bad_count > 0: print(f" [{ch_num}] {ch_title}: {bad_count} pages need dims — fixing from R2...") cur2.execute( 'SELECT id, number FROM "Page" WHERE "chapterId" = %s ' 'AND (width IS NULL OR width = 0 OR height IS NULL OR height = 0) ' 'ORDER BY number', (ch_id,), ) pages = cur2.fetchall() def read_dims(args, _slug=slug, _n=ch_num): page_id, pn = args r2_key = f"manga/{_slug}/chapters/{_n}/{pn}.webp" try: data = s3.get_object(Bucket=BUCKET, Key=r2_key)["Body"].read() with Image.open(io.BytesIO(data)) as img: return page_id, img.width, img.height except Exception: return page_id, None, None updated = 0 with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool: for page_id, w, h in pool.map(read_dims, pages): if w and h: cur2.execute( 'UPDATE "Page" SET width = %s, height = %s WHERE id = %s', (w, h, page_id), ) updated += 1 conn.commit() fixed_dims += 1 print(f" {updated}/{len(pages)} dims updated") else: print(f" [{ch_num}] {ch_title}: {site_count} pages OK (w {min_w}-{max_w}, h {min_h}-{max_h})") print(f"\n Done: {reuploaded} re-uploaded, {fixed_dims} dim-fixed") try: with_browser(run) finally: conn.close() print("\nCheck complete!") def tui_r2_manage(): while True: idx = tui_select("R2 / DB Management", [ "Status", "Edit manga info", "Delete specific manga", "Delete specific chapter", "Check missing pages", "Clear ALL (R2 + DB)", "Recompress manga (quality 65)", ]) if idx == -1: break elif idx == 0: _ensure_config() slug_counts = {} total = 0 for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET): for obj in pg.get("Contents", []): total += 1 parts = obj["Key"].split("/") if len(parts) >= 2 and parts[0] == "manga": slug_counts[parts[1]] = slug_counts.get(parts[1], 0) + 1 print(f"\n R2: {total} objects, {len(slug_counts)} manga") for slug in sorted(slug_counts): print(f" {manga_display_name(slug)}: {slug_counts[slug]} objects") try: conn = get_db() cur = conn.cursor() cur.execute('SELECT COUNT(*) FROM "Manga"') mc = cur.fetchone()[0] cur.execute('SELECT COUNT(*) FROM "Chapter"') cc = cur.fetchone()[0] cur.execute('SELECT COUNT(*) FROM "Page"') pc = cur.fetchone()[0] print(f" DB: {mc} manga, {cc} chapters, {pc} pages") conn.close() except Exception as e: print(f" DB: {e}") input("\n Press ENTER...") elif idx == 1: tui_edit_manga() elif idx == 2: picked = tui_pick_r2() if not picked: continue confirm = input(f" Delete {picked} from R2 + DB? [y/N] ").strip().lower() if confirm == "y": r2_delete_prefix(f"manga/{picked}/") try: conn = get_db() cur = conn.cursor() cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (picked,)) row = cur.fetchone() if row: mid = row[0] cur.execute('DELETE FROM "Page" WHERE "chapterId" IN (SELECT id FROM "Chapter" WHERE "mangaId" = %s)', (mid,)) cur.execute('DELETE FROM "Chapter" WHERE "mangaId" = %s', (mid,)) cur.execute('DELETE FROM "Manga" WHERE id = %s', (mid,)) conn.commit() print(f" Removed from R2 + DB") conn.close() except Exception as e: print(f" DB error: {e}") elif idx == 3: tui_delete_chapter() elif idx == 4: tui_check_missing_pages() elif idx == 5: confirm = input(" Delete ALL R2 + DB? [y/N] ").strip().lower() if confirm == "y": r2_delete_prefix("") try: conn = get_db() cur = conn.cursor() for t in ['"Page"', '"Chapter"', '"Manga"']: cur.execute(f"DELETE FROM {t}") conn.commit() conn.close() print(" All cleared") except Exception as e: print(f" DB error: {e}") elif idx == 6: slugs = r2_list_prefixes() if not slugs: print(" R2 is empty") continue items = ["All manga"] + [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)] sel = tui_select("Recompress which? (quality=65, overwrites originals)", items, search=True) if sel < 0: continue targets = slugs if sel == 0 else [slugs[sel - 1]] confirm = input(f" Recompress {len(targets)} manga to quality 65? [y/N] ").strip().lower() if confirm != "y": continue for slug in targets: print(f"\n {manga_display_name(slug)}") r2_recompress(slug, quality=65) def main(): try: while True: idx = tui_select("Manga Toolkit", [ "Setup (solve Cloudflare)", "Download", "Upload (local -> R2)", "Sync (site -> R2)", "R2 / DB management", "Quit", ], back=False) if idx is None or idx == -1 or idx == 5: break elif idx == 0: cmd_setup() elif idx == 1: tui_download() elif idx == 2: tui_upload() elif idx == 3: tui_sync() elif idx == 4: tui_r2_manage() finally: close_session() print("Bye!") if __name__ == "__main__": main()