first commit

2026-04-11 16:55:13 +08:00 · 2026-04-11 16:55:13 +08:00 · 721ad213ee
commit 721ad213ee
6 changed files with 1228 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,6 @@
+.env
+__pycache__/
+manga-content/
+.browser-data/
+cookies.txt
+.DS_Store
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -0,0 +1,47 @@
+# CLAUDE.md
+
+This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
+
+## Project Overview
+
+Manga downloader for m.happymh.com. Reads manga URLs from `manga.json` and downloads chapter images into `manga-content/`.
+
+## Data Flow
+
+1. **Input**: `manga.json` — JSON array of manga URLs (e.g., `["https://m.happymh.com/manga/butiange"]`)
+2. **Output**: `manga-content/<manga-name>/<chapter-number> <chapter-title>/*.jpg` — downloaded page images
+3. **Metadata**: `manga-content/<manga-name>/detail.json` — stores manga/chapter metadata
+
+## Directory Convention
+
+```
+manga-content/
+  butiange/
+    detail.json
+    1 第一回/
+      1.jpg
+      2.jpg
+      3.jpg
+    2 第二回/
+      ...
+```
+
+- Manga name is the URL slug (last path segment of the manga URL)
+- Chapter folders are named `<number> <title>` (e.g., `1 第一回`)
+- Image filenames are sequential page numbers (`1.jpg`, `2.jpg`, ...)
+
+## Metadata Format (`detail.json`)
+
+Each manga folder contains a `detail.json` with fields:
+- `mg-url` — source URL on m.happymh.com
+- `mg-title` — manga title (Chinese)
+- `mg-author` — author name
+- `mg-genres` — array of genre tags
+- `mg-description` — synopsis text
+
+## Target Site
+
+- Base URL: `https://m.happymh.com`
+- Manga page: `/manga/<slug>` — contains chapter listing
+- Chapter page: `/reads/<slug>/<chapter-id>` — contains page images
+- The site is mobile-oriented; requests should use appropriate mobile User-Agent headers
--- a/download.py
+++ b/download.py
@ -0,0 +1,686 @@
+"""
+Manga downloader for m.happymh.com (educational purposes only).
+
+Launches real Chrome via subprocess (not Playwright), then connects via
+Chrome DevTools Protocol. Images are downloaded directly via HTTP.
+
+Usage:
+    python download.py --setup   # open Chrome, solve CF manually, exit
+    python download.py           # download manga from manga.json
+"""
+
+import json
+import re
+import sys
+import time
+import socket
+import subprocess
+from pathlib import Path
+from urllib.parse import urlparse
+
+from playwright.sync_api import sync_playwright
+
+BASE_URL = "https://m.happymh.com"
+USER_AGENT = (
+    "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
+    "AppleWebKit/537.36 (KHTML, like Gecko) "
+    "Chrome/143.0.0.0 Safari/537.36"
+)
+ROOT_DIR = Path(__file__).parent
+CONTENT_DIR = ROOT_DIR / "manga-content"
+MANGA_JSON = ROOT_DIR / "manga.json"
+BROWSER_DATA = ROOT_DIR / ".browser-data"
+CDP_PORT = 9333
+REQUEST_DELAY = 1.5
+
+CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+
+
+def is_port_open(port):
+    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+        return s.connect_ex(("localhost", port)) == 0
+
+
+def launch_chrome(start_url=None):
+    """Launch real Chrome with CDP port."""
+    if is_port_open(CDP_PORT):
+        print(f"Chrome already on port {CDP_PORT}")
+        return None
+
+    if not Path(CHROME_PATH).exists():
+        print(f"Chrome not found at: {CHROME_PATH}")
+        sys.exit(1)
+
+    cmd = [
+        CHROME_PATH,
+        f"--remote-debugging-port={CDP_PORT}",
+        f"--user-data-dir={BROWSER_DATA}",
+        "--no-first-run",
+        "--no-default-browser-check",
+    ]
+    if start_url:
+        cmd.append(start_url)
+
+    proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+
+    for _ in range(30):
+        if is_port_open(CDP_PORT):
+            time.sleep(1)
+            return proc
+        time.sleep(0.5)
+
+    print("Chrome failed to start")
+    sys.exit(1)
+
+
+def wait_for_cloudflare(page, timeout=120):
+    """Wait for CF to resolve. User solves CAPTCHA manually if needed."""
+    for i in range(timeout):
+        try:
+            title = page.title()
+        except Exception:
+            time.sleep(1)
+            continue
+
+        if "Just a moment" in title or "challenge" in page.url:
+            if i == 0:
+                print("  CF challenge — solve in browser...")
+            elif i % 15 == 0:
+                print(f"  Still waiting for CF... ({i}s)")
+            time.sleep(1)
+            continue
+
+        if title and "嗨皮漫画" in title:
+            return True
+        if title and "happymh" in page.url:
+            return True
+
+        time.sleep(1)
+
+    print("  CF timed out.")
+    return False
+
+
+def fetch_chapters_via_api(page, slug):
+    """Get full chapter list via chapterByPage API with pagination."""
+    result = page.evaluate("""
+        async (slug) => {
+            const all = [];
+            let total = 0;
+            for (let p = 1; p <= 30; p++) {
+                const url = `/v2.0/apis/manga/chapterByPage?code=${slug}&lang=cn&order=asc&page=${p}&_t=${Date.now()}`;
+                try {
+                    const ctrl = new AbortController();
+                    setTimeout(() => ctrl.abort(), 10000);
+                    const r = await fetch(url, { signal: ctrl.signal });
+                    if (!r.ok) { if (p === 1) return { error: r.status }; break; }
+                    const json = await r.json();
+                    if (!json.data) break;
+
+                    total = json.data.total || total;
+
+                    // Find chapter array in response
+                    let items = null;
+                    for (const val of Object.values(json.data)) {
+                        if (Array.isArray(val) && val.length > 0) {
+                            items = val;
+                            break;
+                        }
+                    }
+                    if (!items || items.length === 0) break;
+
+                    for (const ch of items) {
+                        all.push({
+                            id: String(ch.id || ''),
+                            chapterName: ch.chapterName || ch.name || '',
+                        });
+                    }
+
+                    if (total && all.length >= total) break;
+                } catch (e) {
+                    if (p === 1) return { error: e.message };
+                    break;
+                }
+            }
+            return { chapters: all, total };
+        }
+    """, slug)
+
+    if result and result.get("chapters") and len(result["chapters"]) > 0:
+        chapters = result["chapters"]
+        total = result.get("total", len(chapters))
+        print(f"  API: {len(chapters)}/{total} chapters")
+        return chapters
+
+    if result and result.get("error"):
+        print(f"  API error: {result['error']}")
+
+    return None
+
+
+def fetch_chapters_from_dom(page):
+    """Scrape all chapters from the MUI Drawer chapter list.
+    Opens drawer, clicks 'load more' repeatedly, then scrapes."""
+    try:
+        page.wait_for_selector("a[href*='/mangaread/']", timeout=15000)
+        page.wait_for_timeout(1000)
+    except Exception:
+        print("  No chapter links found")
+        return None
+
+    # Step 1: Open the chapter list drawer
+    for selector in [
+        "text=展开全部", "text=查看全部", "text=全部章节",
+        "text=展开更多", "text=更多",
+        "[class*='expand']", "[class*='more']",
+    ]:
+        try:
+            btn = page.query_selector(selector)
+            if btn and btn.is_visible():
+                btn.click()
+                print("  Opening chapter drawer...")
+                page.wait_for_timeout(2000)
+                break
+        except Exception:
+            continue
+
+    # Step 2: Wait for drawer
+    try:
+        page.wait_for_selector(".MuiDrawer-paper", timeout=5000)
+    except Exception:
+        print("  Drawer not found, using page chapters")
+
+    # Step 3: Click sort button to get ascending order (oldest first)
+    try:
+        sort_btn = page.query_selector("text=点我改变排序")
+        if sort_btn and sort_btn.is_visible():
+            sort_btn.click()
+            print("  Sorting ascending...")
+            page.wait_for_timeout(2000)
+    except Exception:
+        pass
+
+    # Step 4: Click "点我加载更多" until all chapters loaded
+    # Get expected total from header "共177个章节"
+    total = page.evaluate("""
+        () => {
+            const spans = document.querySelectorAll('.MuiDrawer-paper span');
+            for (const s of spans) {
+                const m = s.textContent.match(/共(\\d+)个章节/);
+                if (m) return parseInt(m[1]);
+            }
+            return 0;
+        }
+    """)
+    if total:
+        print(f"  Total chapters: {total}")
+
+    for round_num in range(50):
+        count = page.evaluate(
+            "document.querySelectorAll('.MuiDrawer-paper a[href*=\"/mangaread/\"]').length"
+        )
+        if total and count >= total:
+            break
+        print(f"  Loading... {count}/{total or '?'}", end="\r")
+
+        # Find and click the "load more" element — search fresh each time
+        clicked = page.evaluate("""
+            () => {
+                const walker = document.createTreeWalker(
+                    document.querySelector('.MuiDrawer-paper') || document.body,
+                    NodeFilter.SHOW_TEXT
+                );
+                while (walker.nextNode()) {
+                    if (walker.currentNode.textContent.includes('加载更多')) {
+                        let el = walker.currentNode.parentElement;
+                        while (el && el.tagName !== 'LI') el = el.parentElement;
+                        if (el) { el.click(); return true; }
+                        walker.currentNode.parentElement.click();
+                        return true;
+                    }
+                }
+                return false;
+            }
+        """)
+        if not clicked:
+            break
+        page.wait_for_timeout(1000)
+
+    count = page.evaluate(
+        "document.querySelectorAll('.MuiDrawer-paper a[href*=\"/mangaread/\"]').length"
+    )
+    print(f"  Loaded {count} chapters" + " " * 20)
+
+    # Step 5: Scrape chapters from the drawer
+    chapters = page.evaluate("""
+        () => {
+            const drawer = document.querySelector('.MuiDrawer-paper');
+            const container = drawer || document;
+            const links = container.querySelectorAll('a[href*="/mangaread/"]');
+            const chapters = [];
+            const seen = new Set();
+            links.forEach(a => {
+                const href = a.getAttribute('href');
+                const match = href.match(/\\/mangaread\\/[^/]+\\/(\\d+)/);
+                if (match && !seen.has(match[1])) {
+                    seen.add(match[1]);
+                    const name = a.textContent.trim();
+                    if (name && name !== '开始阅读') {
+                        chapters.push({ id: match[1], chapterName: name });
+                    }
+                }
+            });
+            return chapters;
+        }
+    """)
+
+    # Step 6: Close drawer
+    try:
+        page.keyboard.press("Escape")
+        page.wait_for_timeout(500)
+    except Exception:
+        pass
+
+    return chapters if chapters else None
+
+
+def fetch_metadata(page):
+    """Extract manga metadata and cover URL from the loaded page."""
+    html_text = page.content()
+    metadata = {"mg-url": page.url}
+
+    m = re.search(r'<h2 class="mg-title">(.*?)</h2>', html_text)
+    if m:
+        metadata["mg-title"] = m.group(1).strip()
+
+    m = re.search(r'<p class="mg-sub-title"><a[^>]*>(.*?)</a>', html_text)
+    if m:
+        metadata["mg-author"] = m.group(1).strip()
+
+    genre_matches = re.findall(r'<p class="mg-cate">.*?</p>', html_text, re.DOTALL)
+    if genre_matches:
+        genres = re.findall(r'<a[^>]*>(.*?)</a>', genre_matches[0])
+        metadata["mg-genres"] = genres
+
+    m = re.search(r'<div class="mg-desc">.*?<p[^>]*>(.*?)</p>', html_text, re.DOTALL)
+    if m:
+        metadata["mg-description"] = m.group(1).strip()
+
+    # Extract cover image URL
+    cover_url = page.evaluate("""
+        () => {
+            // Try og:image meta tag
+            const og = document.querySelector('meta[property="og:image"]');
+            if (og) return og.content;
+            // Try common cover selectors
+            const selectors = ['img.mg-cover', '.mg-cover img', '.cover img', 'img[src*="mcover"]'];
+            for (const sel of selectors) {
+                const img = document.querySelector(sel);
+                if (img && img.src) return img.src;
+            }
+            return null;
+        }
+    """)
+    if cover_url:
+        metadata["mg-cover"] = cover_url
+
+    return metadata
+
+
+def get_chapter_images(page, slug, chapter_id):
+    """Navigate to reader page, intercept the API response for image URLs."""
+    captured_images = []
+    api_info = {"found": False, "error": None}
+
+    def on_response(response):
+        if "/apis/manga/reading" not in response.url:
+            return
+        api_info["found"] = True
+        if response.status != 200:
+            api_info["error"] = f"status {response.status}"
+            return
+        try:
+            data = response.json()
+            scans = data.get("data", {}).get("scans", [])
+            if isinstance(scans, str):
+                scans = json.loads(scans)
+            for scan in scans:
+                if isinstance(scan, dict) and "url" in scan:
+                    captured_images.append({
+                        "url": scan["url"],
+                        "no_referrer": scan.get("r", 0) != 0,
+                    })
+        except Exception as e:
+            api_info["error"] = str(e)
+
+    page.on("response", on_response)
+
+    reader_url = f"{BASE_URL}/mangaread/{slug}/{chapter_id}"
+    print("    Loading reader...")
+    try:
+        page.evaluate(f"window.location.href = '{reader_url}'")
+    except Exception:
+        pass
+
+    time.sleep(2)
+
+    try:
+        page.evaluate("window.close = () => {}")
+    except Exception:
+        pass
+
+    print("    Waiting for page...")
+    if not wait_for_cloudflare(page, timeout=90):
+        page.remove_listener("response", on_response)
+        return []
+
+    print("    Waiting for API...")
+    deadline = time.time() + 20
+    while time.time() < deadline:
+        if captured_images:
+            break
+        try:
+            page.wait_for_timeout(500)
+        except Exception:
+            break
+
+    page.remove_listener("response", on_response)
+
+    if not api_info["found"]:
+        print("    API not intercepted")
+    elif api_info["error"]:
+        print(f"    API: {api_info['error']}")
+
+    # DOM fallback
+    if not captured_images:
+        try:
+            page.wait_for_timeout(3000)
+            dom_images = page.evaluate("""
+                () => {
+                    const imgs = document.querySelectorAll('img[src*="http"]');
+                    const urls = [];
+                    const seen = new Set();
+                    imgs.forEach(img => {
+                        const src = img.src || '';
+                        if (src && !seen.has(src) && !src.includes('/mcover/')
+                            && !src.includes('cloudflare') && !src.includes('.svg')) {
+                            seen.add(src);
+                            urls.push(src);
+                        }
+                    });
+                    return urls;
+                }
+            """)
+            if dom_images:
+                print(f"    DOM: {len(dom_images)} images")
+                for u in dom_images:
+                    captured_images.append({"url": u, "no_referrer": False})
+        except Exception as e:
+            print(f"    DOM failed: {e}")
+
+    return captured_images
+
+
+def download_image(page, img, save_path):
+    """Download image via browser network stack. Captures raw bytes via CDP — no base64."""
+    if save_path.exists():
+        return True
+
+    url = img["url"]
+    ref_policy = "no-referrer" if img.get("no_referrer") else "origin"
+
+    try:
+        with page.expect_response(lambda r: url in r.url, timeout=15000) as resp_info:
+            page.evaluate(
+                "([u, r]) => fetch(u, { referrerPolicy: r })",
+                [url, ref_policy],
+            )
+        response = resp_info.value
+        if response.status == 200:
+            body = response.body()  # raw bytes from network layer
+            if body and len(body) > 100:
+                save_path.parent.mkdir(parents=True, exist_ok=True)
+                save_path.write_bytes(body)
+                return True
+    except Exception as e:
+        if not hasattr(download_image, "_err_logged"):
+            download_image._err_logged = True
+            print(f"\n    First error: {e}")
+        return False
+
+    return False
+
+
+def get_existing_chapters(manga_dir):
+    existing = set()
+    if manga_dir.exists():
+        for entry in manga_dir.iterdir():
+            if entry.is_dir() and any(entry.glob("*.jpg")):
+                existing.add(entry.name)
+    return existing
+
+
+def download_manga(page, manga_url):
+    """Download all chapters using a single page."""
+    slug = urlparse(manga_url).path.strip("/").split("/")[-1]
+    manga_dir = CONTENT_DIR / slug
+
+    print(f"\n{'='*60}")
+    print(f"Manga: {slug}")
+    print(f"{'='*60}")
+
+    # Intercept all cover images from page load traffic
+    cover_responses = {}
+
+    def on_manga_response(response):
+        if "/mcover/" in response.url and response.status == 200:
+            try:
+                cover_responses[response.url] = response.body()
+            except Exception:
+                pass
+
+    page.on("response", on_manga_response)
+
+    print("Loading manga page...")
+    try:
+        page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
+    except Exception:
+        pass
+    if not wait_for_cloudflare(page):
+        page.remove_listener("response", on_manga_response)
+        return
+
+    print("Fetching chapters via API...")
+    chapters = fetch_chapters_via_api(page, slug)
+    if not chapters:
+        print("  API failed, trying DOM...")
+        chapters = fetch_chapters_from_dom(page)
+    if not chapters:
+        print("No chapters found.")
+        return
+
+    print(f"Found {len(chapters)} chapters")
+
+    metadata = fetch_metadata(page)
+    manga_dir.mkdir(parents=True, exist_ok=True)
+    detail_path = manga_dir / "detail.json"
+    if metadata:
+        existing_meta = {}
+        if detail_path.exists():
+            try:
+                existing_meta = json.loads(detail_path.read_text(encoding="utf-8"))
+            except json.JSONDecodeError:
+                pass
+        existing_meta.update(metadata)
+        detail_path.write_text(
+            json.dumps(existing_meta, ensure_ascii=False, indent=4),
+            encoding="utf-8",
+        )
+
+    # Save cover image — match the correct one from DOM
+    page.remove_listener("response", on_manga_response)
+    cover_path = manga_dir / "cover.jpg"
+    if not cover_path.exists():
+        # Find the actual cover URL from the first mip-fill-content img
+        cover_url = page.evaluate("""
+            () => {
+                const img = document.querySelector('img.mip-fill-content[src*="mcover"]');
+                return img ? img.src : null;
+            }
+        """)
+        cover_body = None
+        if cover_url:
+            # Exact match first
+            cover_body = cover_responses.get(cover_url)
+            # Substring match fallback
+            if not cover_body:
+                for url, data in cover_responses.items():
+                    if cover_url.split("?")[0] in url or url.split("?")[0] in cover_url:
+                        cover_body = data
+                        break
+
+        if cover_body and len(cover_body) > 100:
+            cover_path.write_bytes(cover_body)
+            print(f"Cover saved ({len(cover_body)} bytes)")
+        else:
+            print(f"Cover not found (captured {len(cover_responses)} mcover images, target: {cover_url})")
+
+    existing_chapters = get_existing_chapters(manga_dir)
+
+    # Chapters are already in DOM order (ascending from drawer)
+    chapters_sorted = chapters
+
+    for i, chapter in enumerate(chapters_sorted, 1):
+        ch_id = chapter["id"]
+        ch_name = chapter["chapterName"]
+        folder_name = f"{i} {ch_name}"
+
+        # Skip if this chapter already downloaded (check by chapter name)
+        already = any(ch_name in name for name in existing_chapters)
+        if already:
+            print(f"  [{i}/{len(chapters_sorted)}] {ch_name} — skip")
+            continue
+
+        print(f"  [{i}/{len(chapters_sorted)}] {ch_name} (id={ch_id})")
+
+        images = get_chapter_images(page, slug, ch_id)
+        if not images:
+            print(f"    No images")
+            continue
+
+        print(f"    {len(images)} pages")
+        chapter_dir = manga_dir / folder_name
+        chapter_dir.mkdir(parents=True, exist_ok=True)
+
+        # Download images via browser network stack (raw bytes, no base64)
+        ok = 0
+        failed = []
+        for pn, img in enumerate(images, 1):
+            save_path = chapter_dir / f"{pn}.jpg"
+            if download_image(page, img, save_path):
+                ok += 1
+                print(f"    {pn}/{len(images)}", end="\r")
+            else:
+                failed.append((pn, img))
+            time.sleep(0.1)
+
+        # Retry failed images once
+        if failed:
+            time.sleep(1)
+            for pn, img in failed:
+                save_path = chapter_dir / f"{pn}.jpg"
+                if download_image(page, img, save_path):
+                    ok += 1
+                else:
+                    print(f"    {pn}/{len(images)} FAIL")
+                time.sleep(0.3)
+
+        print(f"    {ok}/{len(images)} downloaded" + " " * 20)
+
+        if ok == 0:
+            try:
+                chapter_dir.rmdir()
+            except Exception:
+                pass
+
+        time.sleep(REQUEST_DELAY)
+
+    print(f"\nDone: {slug}")
+
+
+def setup_mode():
+    """Launch Chrome for manual CF solving."""
+    print("=== SETUP ===")
+    print("Chrome will open. Do this:")
+    print("  1. Go to m.happymh.com — solve Cloudflare")
+    print("  2. Open a manga page — solve CF if prompted")
+    print("  3. Open a chapter reader — solve CF if prompted")
+    print("  4. Press ENTER here when done\n")
+
+    chrome_proc = launch_chrome(BASE_URL)
+
+    input(">>> Press ENTER when Cloudflare is solved... ")
+
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}")
+            ctx = browser.contexts[0]
+            cookies = ctx.cookies()
+            cf = [c for c in cookies if c["name"] == "cf_clearance"]
+            if cf:
+                print("cf_clearance found!")
+            else:
+                print("Warning: cf_clearance not found")
+            browser.close()
+    except Exception as e:
+        print(f"Could not verify: {e}")
+
+    if chrome_proc:
+        chrome_proc.terminate()
+
+    print("Done. Now run: python download.py")
+
+
+def main():
+    if "--setup" in sys.argv:
+        setup_mode()
+        return
+
+    if not MANGA_JSON.exists():
+        print(f"Error: {MANGA_JSON} not found")
+        sys.exit(1)
+
+    manga_urls = json.loads(MANGA_JSON.read_text(encoding="utf-8"))
+    if not isinstance(manga_urls, list) or not manga_urls:
+        print("Error: manga.json should be a JSON array of URLs")
+        sys.exit(1)
+
+    print(f"Found {len(manga_urls)} manga(s)")
+    print("Launching Chrome...\n")
+
+    chrome_proc = launch_chrome()
+
+    try:
+        with sync_playwright() as p:
+            browser = p.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}")
+            context = browser.contexts[0]
+            page = context.pages[0] if context.pages else context.new_page()
+
+            for url in manga_urls:
+                try:
+                    download_manga(page, url)
+                except Exception as e:
+                    print(f"\nError: {url}: {e}")
+                    import traceback
+                    traceback.print_exc()
+
+            browser.close()
+    finally:
+        if chrome_proc:
+            chrome_proc.terminate()
+
+    print("\nAll done!")
+
+
+if __name__ == "__main__":
+    main()
--- a/export_cookies.py
+++ b/export_cookies.py
@ -0,0 +1,92 @@
+"""
+Opens a browser to m.happymh.com, waits for you to pass Cloudflare,
+then saves cookies to cookies.txt in Netscape format.
+
+Install:
+    pip install playwright
+    playwright install chromium
+
+Usage:
+    python export_cookies.py
+"""
+
+import time
+from pathlib import Path
+
+try:
+    from playwright.sync_api import sync_playwright
+except ImportError:
+    print("Playwright not installed. Run:")
+    print("  pip install playwright")
+    print("  playwright install chromium")
+    raise SystemExit(1)
+
+COOKIES_FILE = Path(__file__).parent / "cookies.txt"
+TARGET_URL = "https://m.happymh.com"
+
+
+def cookies_to_netscape(cookies):
+    """Convert Playwright cookies to Netscape cookies.txt format."""
+    lines = ["# Netscape HTTP Cookie File", ""]
+    for c in cookies:
+        domain = c["domain"]
+        # Netscape format: leading dot means accessible to subdomains
+        if not domain.startswith("."):
+            domain = "." + domain
+        flag = "TRUE"  # accessible to subdomains
+        path = c.get("path", "/")
+        secure = "TRUE" if c.get("secure", False) else "FALSE"
+        expires = str(int(c.get("expires", 0)))
+        name = c["name"]
+        value = c["value"]
+        lines.append(f"{domain}\t{flag}\t{path}\t{secure}\t{expires}\t{name}\t{value}")
+    return "\n".join(lines) + "\n"
+
+
+def main():
+    print("Opening browser to m.happymh.com...")
+    print("Once the page loads (past Cloudflare), press ENTER here to save cookies.\n")
+
+    with sync_playwright() as p:
+        browser = p.chromium.launch(headless=False)
+        context = browser.new_context(
+            user_agent=(
+                "Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) "
+                "AppleWebKit/605.1.15 (KHTML, like Gecko) "
+                "Version/16.0 Mobile/15E148 Safari/604.1"
+            ),
+            viewport={"width": 390, "height": 844},
+            is_mobile=True,
+        )
+
+        page = context.new_page()
+        page.goto(TARGET_URL)
+
+        input(">>> Page opened. Pass Cloudflare if needed, then press ENTER to save cookies... ")
+
+        cookies = context.cookies()
+        if not cookies:
+            print("No cookies found!")
+            browser.close()
+            return
+
+        # Check for cf_clearance
+        cookie_names = [c["name"] for c in cookies]
+        if "cf_clearance" in cookie_names:
+            print("cf_clearance cookie found (Cloudflare passed)")
+        else:
+            print("Warning: cf_clearance not found. You may still be on the challenge page.")
+            answer = input("Save anyway? [y/N] ").strip().lower()
+            if answer != "y":
+                browser.close()
+                return
+
+        text = cookies_to_netscape(cookies)
+        COOKIES_FILE.write_text(text)
+        print(f"\nSaved {len(cookies)} cookies to {COOKIES_FILE}")
+
+        browser.close()
+
+
+if __name__ == "__main__":
+    main()
--- a/manga.json
+++ b/manga.json
@ -0,0 +1,4 @@
+[
+    "https://m.happymh.com/manga/moutianchengweimoshen",
+    "https://m.happymh.com/manga/butiange"
+]
--- a/upload.py
+++ b/upload.py
@ -0,0 +1,393 @@
+"""
+Interactive manga uploader — Cloudflare R2 + PostgreSQL.
+
+R2 storage layout:
+    manga/<slug>/cover.webp
+    manga/<slug>/chapters/<number>/<page>.webp
+
+Usage:
+    python upload.py
+"""
+
+import io
+import json
+import os
+import re
+import sys
+from concurrent.futures import ThreadPoolExecutor, as_completed
+from pathlib import Path
+
+import boto3
+import psycopg2
+from PIL import Image
+from dotenv import load_dotenv
+
+load_dotenv()
+
+ROOT_DIR = Path(__file__).parent
+CONTENT_DIR = ROOT_DIR / "manga-content"
+
+# R2 config
+s3 = boto3.client(
+    "s3",
+    endpoint_url=f"https://{os.environ['R2_ACCOUNT_ID']}.r2.cloudflarestorage.com",
+    aws_access_key_id=os.environ["R2_ACCESS_KEY"],
+    aws_secret_access_key=os.environ["R2_SECRET_KEY"],
+    region_name="auto",
+)
+BUCKET = os.environ["R2_BUCKET"]
+PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/")
+
+# Database
+DATABASE_URL = os.environ["DATABASE_URL"]
+UPLOAD_WORKERS = 8
+
+
+def convert_to_webp(image_path, quality=80):
+    img = Image.open(image_path)
+    buf = io.BytesIO()
+    img.save(buf, format="WEBP", quality=quality)
+    buf.seek(0)
+    return buf.read()
+
+
+def make_cover(image_path, width=400, height=560):
+    img = Image.open(image_path)
+    target_ratio = width / height
+    img_ratio = img.width / img.height
+    if img_ratio > target_ratio:
+        new_width = int(img.height * target_ratio)
+        left = (img.width - new_width) // 2
+        img = img.crop((left, 0, left + new_width, img.height))
+    else:
+        new_height = int(img.width / target_ratio)
+        img = img.crop((0, 0, img.width, new_height))
+    img = img.resize((width, height), Image.LANCZOS)
+    buf = io.BytesIO()
+    img.save(buf, format="WEBP", quality=85)
+    buf.seek(0)
+    return buf.read()
+
+
+def upload_to_r2(key, data, content_type="image/webp"):
+    s3.put_object(Bucket=BUCKET, Key=key, Body=data, ContentType=content_type)
+    return f"{PUBLIC_URL}/{key}"
+
+
+def r2_key_exists(key):
+    try:
+        s3.head_object(Bucket=BUCKET, Key=key)
+        return True
+    except s3.exceptions.ClientError:
+        return False
+
+
+def get_db():
+    conn = psycopg2.connect(DATABASE_URL)
+    conn.set_client_encoding("UTF8")
+    return conn
+
+
+def parse_chapter_dir(dir_name):
+    """Parse '1 001. 序章' -> (1, '001. 序章')."""
+    m = re.match(r"^(\d+)\s+(.+)$", dir_name)
+    if m:
+        return int(m.group(1)), m.group(2)
+    return 0, dir_name
+
+
+def list_local_manga():
+    """List manga directories in manga-content/."""
+    dirs = sorted(
+        d.name for d in CONTENT_DIR.iterdir()
+        if d.is_dir() and not d.name.startswith(".")
+    )
+    return dirs
+
+
+# ── Commands ──────────────────────────────────────────────
+
+
+def cmd_reset():
+    """Clear all R2 storage."""
+    print("\nClearing R2 bucket...")
+    total = 0
+    batches = []
+    paginator = s3.get_paginator("list_objects_v2")
+    for page in paginator.paginate(Bucket=BUCKET):
+        objects = page.get("Contents", [])
+        if not objects:
+            break
+        batches.append([{"Key": obj["Key"]} for obj in objects])
+
+    # Delete batches in parallel
+    def delete_batch(keys):
+        s3.delete_objects(Bucket=BUCKET, Delete={"Objects": keys})
+        return len(keys)
+
+    with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
+        for count in pool.map(delete_batch, batches):
+            total += count
+            print(f"  {total} deleted", end="\r")
+
+    print(f"  {total} objects deleted from R2" + " " * 10)
+    print("R2 cleared. Run 'upload' to re-upload.\n")
+
+
+def cmd_status(conn):
+    """Show current state of R2 and database."""
+    cur = conn.cursor()
+
+    # DB counts
+    cur.execute('SELECT COUNT(*) FROM "Manga"')
+    manga_count = cur.fetchone()[0]
+    cur.execute('SELECT COUNT(*) FROM "Chapter"')
+    chapter_count = cur.fetchone()[0]
+    cur.execute('SELECT COUNT(*) FROM "Page"')
+    page_count = cur.fetchone()[0]
+
+    print(f"\n  Database: {manga_count} manga, {chapter_count} chapters, {page_count} pages")
+
+    # List manga in DB
+    cur.execute('SELECT slug, title, (SELECT COUNT(*) FROM "Chapter" WHERE "mangaId" = "Manga".id) FROM "Manga" ORDER BY slug')
+    for slug, title, ch_count in cur.fetchall():
+        print(f"    {slug}: {title} ({ch_count} chapters)")
+
+    # R2 count
+    total = 0
+    paginator = s3.get_paginator("list_objects_v2")
+    for page in paginator.paginate(Bucket=BUCKET):
+        total += len(page.get("Contents", []))
+    print(f"  R2: {total} objects")
+
+    # Local
+    local = list_local_manga()
+    print(f"  Local: {len(local)} manga in manga-content/")
+    for name in local:
+        manga_path = CONTENT_DIR / name
+        chapters = [d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")]
+        has_cover = (manga_path / "cover.jpg").exists()
+        print(f"    {name}: {len(chapters)} chapters, cover: {'yes' if has_cover else 'no'}")
+    print()
+
+
+def cmd_upload(conn, manga_name=None):
+    """Upload manga to R2 and create DB records."""
+    if manga_name:
+        manga_dirs = [manga_name]
+        if not (CONTENT_DIR / manga_name).is_dir():
+            print(f"  Not found: {CONTENT_DIR / manga_name}")
+            return
+    else:
+        manga_dirs = list_local_manga()
+
+    if not manga_dirs:
+        print("  No manga found in manga-content/")
+        return
+
+    print(f"\n  Uploading {len(manga_dirs)} manga(s)...")
+
+    for name in manga_dirs:
+        upload_manga(name, conn)
+
+    print("\nUpload complete!")
+
+
+def upload_manga(manga_name, conn):
+    manga_path = CONTENT_DIR / manga_name
+    detail_path = manga_path / "detail.json"
+
+    if not detail_path.exists():
+        print(f"  Skipping {manga_name}: no detail.json")
+        return
+
+    detail = json.loads(detail_path.read_text(encoding="utf-8"))
+    title = detail.get("mg-title", manga_name)
+    slug = manga_name
+    genres = detail.get("mg-genres", [])
+    description = detail.get("mg-description", "")
+    if not description and genres:
+        description = f"Genres: {', '.join(genres)}"
+    genre = genres[0] if genres else "Drama"
+
+    print(f"\n  {'='*50}")
+    print(f"  {title} ({slug})")
+    print(f"  {'='*50}")
+
+    cur = conn.cursor()
+
+    # Cover
+    cover_file = manga_path / "cover.jpg"
+    cover_url = ""
+    cover_key = f"manga/{slug}/cover.webp"
+
+    if cover_file.exists():
+        if not r2_key_exists(cover_key):
+            cover_data = make_cover(cover_file)
+            cover_url = upload_to_r2(cover_key, cover_data)
+            print(f"  Cover uploaded")
+        else:
+            cover_url = f"{PUBLIC_URL}/{cover_key}"
+            print(f"  Cover exists")
+    else:
+        print("  No cover.jpg")
+
+    # Manga record
+    cur.execute('SELECT id, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
+    row = cur.fetchone()
+
+    if row:
+        manga_id, existing_cover = row
+        print(f"  Manga exists (id: {manga_id})")
+        if cover_url and cover_url != existing_cover:
+            cur.execute(
+                'UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
+                (cover_url, manga_id),
+            )
+            conn.commit()
+    else:
+        cur.execute(
+            """
+            INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt")
+            VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW())
+            RETURNING id
+            """,
+            (title, description, cover_url, slug, genre),
+        )
+        manga_id = cur.fetchone()[0]
+        conn.commit()
+        print(f"  Created manga (id: {manga_id})")
+
+    # Chapters
+    chapter_dirs = sorted(
+        [d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")],
+        key=lambda d: parse_chapter_dir(d.name)[0],
+    )
+
+    for chapter_dir in chapter_dirs:
+        order_num, chapter_title = parse_chapter_dir(chapter_dir.name)
+        if order_num == 0:
+            continue
+
+        cur.execute(
+            'SELECT id FROM "Chapter" WHERE "mangaId" = %s AND number = %s',
+            (manga_id, order_num),
+        )
+        if cur.fetchone():
+            print(f"    [{order_num}] {chapter_title} — skip")
+            continue
+
+        page_files = sorted(
+            [f for f in chapter_dir.iterdir() if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")],
+            key=lambda f: int(re.search(r"(\d+)", f.stem).group(1)) if re.search(r"(\d+)", f.stem) else 0,
+        )
+
+        if not page_files:
+            continue
+
+        print(f"    [{order_num}] {chapter_title} ({len(page_files)} pages)")
+
+        cur.execute(
+            'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
+            (manga_id, order_num, chapter_title),
+        )
+        chapter_id = cur.fetchone()[0]
+        conn.commit()
+
+        # Parallel convert + upload
+        def process_page(args):
+            j, page_file = args
+            r2_key = f"manga/{slug}/chapters/{order_num}/{j}.webp"
+            if not r2_key_exists(r2_key):
+                webp_data = convert_to_webp(page_file)
+                return j, upload_to_r2(r2_key, webp_data)
+            return j, f"{PUBLIC_URL}/{r2_key}"
+
+        page_urls = {}
+        done = 0
+        with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
+            futures = {pool.submit(process_page, (j, f)): j for j, f in enumerate(page_files, 1)}
+            for future in as_completed(futures):
+                j, url = future.result()
+                page_urls[j] = url
+                done += 1
+                print(f"      {done}/{len(page_files)}", end="\r")
+
+        # Batch insert page records in order
+        for j in sorted(page_urls):
+            cur.execute(
+                'INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)',
+                (chapter_id, j, page_urls[j]),
+            )
+
+        conn.commit()
+        print(f"      {len(page_files)} pages uploaded" + " " * 10)
+
+
+# ── Interactive loop ──────────────────────────────────────
+
+
+def show_menu():
+    print()
+    print("=" * 40)
+    print("  Manga Uploader")
+    print("=" * 40)
+    print("  1. Status")
+    print("  2. Upload all manga")
+    print("  3. Upload specific manga")
+    print("  4. Reset R2 storage")
+    print("  0. Quit")
+    print()
+
+
+def main():
+    conn = get_db()
+    try:
+        while True:
+            show_menu()
+            try:
+                choice = input("Select [0-4]: ").strip()
+            except (EOFError, KeyboardInterrupt):
+                print()
+                break
+
+            if choice == "0":
+                break
+            elif choice == "1":
+                cmd_status(conn)
+            elif choice == "2":
+                cmd_upload(conn)
+            elif choice == "3":
+                local = list_local_manga()
+                if not local:
+                    print("  No manga in manga-content/")
+                    continue
+                print()
+                for i, name in enumerate(local, 1):
+                    print(f"  {i}. {name}")
+                print()
+                pick = input("Select manga number: ").strip()
+                try:
+                    idx = int(pick) - 1
+                    if 0 <= idx < len(local):
+                        cmd_upload(conn, local[idx])
+                    else:
+                        print("  Invalid selection")
+                except ValueError:
+                    print("  Invalid input")
+            elif choice == "4":
+                confirm = input("  Delete ALL R2 objects? [y/N] ").strip().lower()
+                if confirm == "y":
+                    cmd_reset()
+                else:
+                    print("  Cancelled.")
+            else:
+                print("  Invalid choice")
+    finally:
+        conn.close()
+
+    print("Bye!")
+
+
+if __name__ == "__main__":
+    main()