sunnymh-manga-dl/manga.py

"""
Manga toolkit — download from m.happymh.com, upload to Cloudflare R2.

Usage:
    python manga.py
"""

import io
import json
import os
import re
import time
import socket
import subprocess
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from urllib.parse import urlparse

import boto3
import psycopg2
from PIL import Image
from dotenv import load_dotenv
from playwright.sync_api import sync_playwright
from simple_term_menu import TerminalMenu

load_dotenv()

# ── Config ─────────────────────────────────────────────────

BASE_URL = "https://m.happymh.com"
ROOT_DIR = Path(__file__).parent
CONTENT_DIR = ROOT_DIR / "manga-content"
MANGA_JSON = ROOT_DIR / "manga.json"
BROWSER_DATA = ROOT_DIR / ".browser-data"
CDP_PORT = 9333
REQUEST_DELAY = 1.5
UPLOAD_WORKERS = 8
CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"

# R2
s3 = boto3.client(
    "s3",
    endpoint_url=f"https://{os.environ['R2_ACCOUNT_ID']}.r2.cloudflarestorage.com",
    aws_access_key_id=os.environ["R2_ACCESS_KEY"],
    aws_secret_access_key=os.environ["R2_SECRET_KEY"],
    region_name="auto",
)
BUCKET = os.environ["R2_BUCKET"]
PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/")
DATABASE_URL = os.environ["DATABASE_URL"]


# ── Chrome management ──────────────────────────────────────


def hide_chrome():
    """Hide Chrome window on macOS."""
    try:
        subprocess.Popen(
            ["osascript", "-e",
             'tell application "System Events" to set visible of process "Google Chrome" to false'],
            stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
        )
    except Exception:
        pass


def is_port_open(port):
    with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
        return s.connect_ex(("localhost", port)) == 0


def launch_chrome(start_url=None):
    if is_port_open(CDP_PORT):
        return None
    if not Path(CHROME_PATH).exists():
        print(f"  Chrome not found at: {CHROME_PATH}")
        return None
    cmd = [
        CHROME_PATH,
        f"--remote-debugging-port={CDP_PORT}",
        f"--user-data-dir={BROWSER_DATA}",
        "--no-first-run",
        "--no-default-browser-check",
        "--window-position=0,0",
        "--window-size=800,600",
        "--no-focus-on-navigate",
    ]
    if start_url:
        cmd.append(start_url)
    proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    for _ in range(30):
        if is_port_open(CDP_PORT):
            time.sleep(1)
            hide_chrome()
            return proc
        time.sleep(0.5)
    print("  Chrome failed to start")
    return None


class BrowserSession:
    """Manages Chrome + CDP lifecycle."""

    def __init__(self):
        self.chrome_proc = None
        self.playwright = None
        self.browser = None
        self.page = None

    def start(self):
        self.chrome_proc = launch_chrome()
        self.playwright = sync_playwright().start()
        self.browser = self.playwright.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}")
        context = self.browser.contexts[0]
        self.page = context.pages[0] if context.pages else context.new_page()

    def close(self):
        try:
            self.browser.close()
        except Exception:
            pass
        if self.chrome_proc:
            self.chrome_proc.terminate()
        if self.playwright:
            self.playwright.stop()


def with_browser(func):
    """Run func(session) inside a Chrome+CDP session. Returns func's result."""
    session = BrowserSession()
    session.start()
    try:
        return func(session)
    finally:
        session.close()


# ── Cloudflare ─────────────────────────────────────────────


def wait_for_cloudflare(session, timeout=120):
    """Wait for CF to resolve. User solves in the visible browser window."""
    page = session.page
    for i in range(timeout):
        try:
            title = page.title()
        except Exception:
            time.sleep(1)
            continue
        if "Just a moment" in title or "challenge" in page.url:
            if i == 0:
                print("  CF challenge — solve in browser...")
            elif i % 15 == 0:
                print(f"  Still waiting for CF... ({i}s)")
            time.sleep(1)
            continue
        if title and ("嗨皮漫画" in title or "happymh" in page.url):
            return True
        time.sleep(1)
    print("  CF timed out.")
    return False


# ── Happymh: chapter fetching ─────────────────────────────


def fetch_chapters_via_api(page, slug):
    result = page.evaluate("""
        async (slug) => {
            const all = [];
            let total = 0;
            for (let p = 1; p <= 30; p++) {
                const url = `/v2.0/apis/manga/chapterByPage?code=${slug}&lang=cn&order=asc&page=${p}&_t=${Date.now()}`;
                try {
                    const ctrl = new AbortController();
                    setTimeout(() => ctrl.abort(), 10000);
                    const r = await fetch(url, { signal: ctrl.signal });
                    if (!r.ok) { if (p === 1) return { error: r.status }; break; }
                    const json = await r.json();
                    if (!json.data) break;
                    total = json.data.total || total;
                    let items = null;
                    for (const val of Object.values(json.data)) {
                        if (Array.isArray(val) && val.length > 0) { items = val; break; }
                    }
                    if (!items || items.length === 0) break;
                    for (const ch of items) {
                        all.push({ id: String(ch.id || ''), chapterName: ch.chapterName || ch.name || '' });
                    }
                    if (total && all.length >= total) break;
                } catch (e) {
                    if (p === 1) return { error: e.message };
                    break;
                }
            }
            return { chapters: all, total };
        }
    """, slug)
    if result and result.get("chapters") and len(result["chapters"]) > 0:
        chapters = result["chapters"]
        total = result.get("total", len(chapters))
        print(f"  API: {len(chapters)}/{total} chapters")
        return chapters
    if result and result.get("error"):
        print(f"  API error: {result['error']}")
    return None


def fetch_chapters_from_dom(page):
    try:
        page.wait_for_selector("a[href*='/mangaread/']", timeout=15000)
        page.wait_for_timeout(1000)
    except Exception:
        return None

    for selector in ["text=展开全部", "text=查看全部", "text=全部章节", "text=展开更多", "text=更多"]:
        try:
            btn = page.query_selector(selector)
            if btn and btn.is_visible():
                btn.click()
                page.wait_for_timeout(2000)
                break
        except Exception:
            continue

    try:
        page.wait_for_selector(".MuiDrawer-paper", timeout=5000)
    except Exception:
        pass

    try:
        sort_btn = page.query_selector("text=点我改变排序")
        if sort_btn and sort_btn.is_visible():
            sort_btn.click()
            page.wait_for_timeout(2000)
    except Exception:
        pass

    total = page.evaluate("""
        () => {
            const spans = document.querySelectorAll('.MuiDrawer-paper span');
            for (const s of spans) {
                const m = s.textContent.match(/共(\\d+)个章节/);
                if (m) return parseInt(m[1]);
            }
            return 0;
        }
    """)

    for _ in range(50):
        count = page.evaluate("document.querySelectorAll('.MuiDrawer-paper a[href*=\"/mangaread/\"]').length")
        if total and count >= total:
            break
        clicked = page.evaluate("""
            () => {
                const walker = document.createTreeWalker(
                    document.querySelector('.MuiDrawer-paper') || document.body, NodeFilter.SHOW_TEXT
                );
                while (walker.nextNode()) {
                    if (walker.currentNode.textContent.includes('加载更多')) {
                        let el = walker.currentNode.parentElement;
                        while (el && el.tagName !== 'LI') el = el.parentElement;
                        if (el) { el.click(); return true; }
                        walker.currentNode.parentElement.click();
                        return true;
                    }
                }
                return false;
            }
        """)
        if not clicked:
            break
        page.wait_for_timeout(1000)

    chapters = page.evaluate("""
        () => {
            const container = document.querySelector('.MuiDrawer-paper') || document;
            const links = container.querySelectorAll('a[href*="/mangaread/"]');
            const chapters = [], seen = new Set();
            links.forEach(a => {
                const match = a.getAttribute('href').match(/\\/mangaread\\/[^/]+\\/(\\d+)/);
                if (match && !seen.has(match[1])) {
                    seen.add(match[1]);
                    const name = a.textContent.trim();
                    if (name && name !== '开始阅读') chapters.push({ id: match[1], chapterName: name });
                }
            });
            return chapters;
        }
    """)

    try:
        page.keyboard.press("Escape")
    except Exception:
        pass
    return chapters if chapters else None


# ── Happymh: metadata & cover ─────────────────────────────


def fetch_metadata(page):
    html_text = page.content()
    metadata = {"mg-url": page.url}
    m = re.search(r'<h2 class="mg-title">(.*?)</h2>', html_text)
    if m:
        metadata["mg-title"] = m.group(1).strip()
    m = re.search(r'<p class="mg-sub-title">.*?<a[^>]*>(.*?)</a>', html_text, re.DOTALL)
    if m:
        metadata["mg-author"] = m.group(1).strip()
    genre_matches = re.findall(r'<p class="mg-cate">.*?</p>', html_text, re.DOTALL)
    if genre_matches:
        metadata["mg-genres"] = re.findall(r'<a[^>]*>(.*?)</a>', genre_matches[0])
    m = re.search(r'<div class="mg-desc">.*?<p[^>]*>(.*?)</p>', html_text, re.DOTALL)
    if m:
        metadata["mg-description"] = m.group(1).strip()
    if not metadata.get("mg-description"):
        m = re.search(r'<mip-showmore[^>]*>(.*?)</mip-showmore>', html_text, re.DOTALL)
        if m:
            desc = re.sub(r'<[^>]+>', '', m.group(1)).strip()
            if desc:
                metadata["mg-description"] = desc
    cover_url = page.evaluate("""
        () => {
            const og = document.querySelector('meta[property="og:image"]');
            if (og) return og.content;
            for (const sel of ['img.mg-cover', 'img[src*="mcover"]']) {
                const img = document.querySelector(sel);
                if (img && img.src) return img.src;
            }
            return null;
        }
    """)
    if cover_url:
        metadata["mg-cover"] = cover_url
    return metadata


# ── Happymh: image download ───────────────────────────────


def _try_get_chapter_images(session, slug, chapter_id):
    """Single attempt to get chapter images. Returns (images, api_status)."""
    captured_images = []
    api_info = {"found": False, "status": None, "error": None}

    def on_response(response):
        if "/apis/manga/reading" not in response.url:
            return
        # Only capture our chapter, skip prefetched ones
        if f"cid={chapter_id}" not in response.url and f"cid%3D{chapter_id}" not in response.url:
            return
        # Ignore if we already captured images (prevent duplicate/prefetch)
        if captured_images:
            return
        api_info["found"] = True
        api_info["status"] = response.status
        if response.status != 200:
            api_info["error"] = f"status {response.status}"
            return
        try:
            data = response.json()
            # Verify chapter ID in response body
            resp_cid = str(data.get("data", {}).get("id", ""))
            if resp_cid and resp_cid != str(chapter_id):
                return
            scans = data.get("data", {}).get("scans", [])
            if isinstance(scans, str):
                scans = json.loads(scans)
            for scan in scans:
                if isinstance(scan, dict) and "url" in scan:
                    captured_images.append({
                        "url": scan["url"],
                        "no_referrer": scan.get("r", 0) != 0,
                    })
        except Exception as e:
            api_info["error"] = str(e)

    page = session.page
    page.on("response", on_response)
    reader_url = f"{BASE_URL}/mangaread/{slug}/{chapter_id}"
    print("    Loading reader...")
    try:
        page.evaluate(f"window.location.href = '{reader_url}'")
    except Exception:
        pass
    hide_chrome()

    time.sleep(2)
    try:
        page.evaluate("window.close = () => {}")
    except Exception:
        pass

    print("    Waiting for page...")
    if not wait_for_cloudflare(session, timeout=90):
        page = session.page
        try:
            page.remove_listener("response", on_response)
        except Exception:
            pass
        return [], api_info

    page = session.page
    print("    Waiting for API...")
    deadline = time.time() + 20
    while time.time() < deadline:
        if captured_images:
            break
        try:
            page.wait_for_timeout(500)
        except Exception:
            break

    try:
        page.remove_listener("response", on_response)
    except Exception:
        pass

    if not api_info["found"]:
        print("    API not intercepted")
    elif api_info["error"]:
        print(f"    API: {api_info['error']}")

    # Filter out next-chapter preview images by counting DOM containers
    if captured_images:
        try:
            counts = page.evaluate("""
                () => {
                    const all = document.querySelectorAll('[class*="imgContainer"]').length;
                    const next = document.querySelectorAll('[class*="imgNext"]').length;
                    return { all, next, current: all - next };
                }
            """)
            if counts and counts.get("next", 0) > 0:
                actual = counts["current"]
                if 0 < actual < len(captured_images):
                    captured_images = captured_images[:actual]
        except Exception:
            pass

    # DOM fallback
    if not captured_images:
        try:
            page.wait_for_timeout(3000)
            dom_images = page.evaluate("""
                () => {
                    const imgs = document.querySelectorAll('img[src*="http"]');
                    const nextImgs = new Set(
                        Array.from(document.querySelectorAll('[class*="imgNext"] img'))
                            .map(img => img.src)
                    );
                    const urls = [], seen = new Set();
                    imgs.forEach(img => {
                        const src = img.src || '';
                        if (src && !seen.has(src) && !nextImgs.has(src)
                            && !src.includes('/mcover/')
                            && !src.includes('cloudflare') && !src.includes('.svg')) {
                            seen.add(src); urls.push(src);
                        }
                    });
                    return urls;
                }
            """)
            if dom_images:
                print(f"    DOM: {len(dom_images)} images")
                for u in dom_images:
                    captured_images.append({"url": u, "no_referrer": False})
        except Exception as e:
            print(f"    DOM failed: {e}")

    return captured_images, api_info


def get_chapter_images(session, slug, chapter_id):
    """Get chapter images. On API 403 (CF expired), navigate to solve and retry."""
    images, api_info = _try_get_chapter_images(session, slug, chapter_id)
    if images:
        return images

    if api_info.get("status") == 403:
        print("    CF expired — solve in browser...")
        page = session.page
        try:
            page.goto(f"{BASE_URL}/mangaread/{slug}/{chapter_id}", wait_until="commit", timeout=60000)
        except Exception:
            pass
        if wait_for_cloudflare(session, timeout=120):
            images, _ = _try_get_chapter_images(session, slug, chapter_id)

    return images


def fetch_image_bytes(session, img):
    """Fetch image via browser network stack, return raw bytes or None."""
    page = session.page
    url = img["url"]
    ref_policy = "no-referrer" if img.get("no_referrer") else "origin"
    try:
        with page.expect_response(lambda r: url in r.url, timeout=15000) as resp_info:
            page.evaluate("([u, r]) => fetch(u, { referrerPolicy: r })", [url, ref_policy])
        response = resp_info.value
        if response.status == 200:
            body = response.body()
            if body and len(body) > 100:
                return body
    except Exception as e:
        if not hasattr(fetch_image_bytes, "_err_logged"):
            fetch_image_bytes._err_logged = True
            print(f"\n    First error: {e}")
    return None


def download_image(session, img, save_path):
    """Fetch image and save to disk."""
    if save_path.exists():
        return True
    body = fetch_image_bytes(session, img)
    if body:
        save_path.parent.mkdir(parents=True, exist_ok=True)
        save_path.write_bytes(body)
        return True
    return False


# ── R2 / Upload ────────────────────────────────────────────


WEBP_QUALITY = 75


def _to_webp_bytes(img, quality=WEBP_QUALITY, method=6):
    buf = io.BytesIO()
    img.save(buf, format="WEBP", quality=quality, method=method)
    return buf.getvalue()


def convert_to_webp(source, quality=WEBP_QUALITY):
    return _to_webp_bytes(Image.open(source), quality)


def make_cover(source, width=400, height=560):
    img = Image.open(source)
    target_ratio = width / height
    img_ratio = img.width / img.height
    if img_ratio > target_ratio:
        new_width = int(img.height * target_ratio)
        left = (img.width - new_width) // 2
        img = img.crop((left, 0, left + new_width, img.height))
    else:
        new_height = int(img.width / target_ratio)
        img = img.crop((0, 0, img.width, new_height))
    img = img.resize((width, height), Image.LANCZOS)
    return _to_webp_bytes(img, quality=80)


def upload_to_r2(key, data, content_type="image/webp"):
    s3.put_object(Bucket=BUCKET, Key=key, Body=data, ContentType=content_type)
    return f"{PUBLIC_URL}/{key}"


def r2_key_exists(key):
    try:
        s3.head_object(Bucket=BUCKET, Key=key)
        return True
    except s3.exceptions.ClientError:
        return False


def get_db():
    conn = psycopg2.connect(DATABASE_URL)
    conn.set_client_encoding("UTF8")
    return conn


def parse_chapter_dir(dir_name):
    m = re.match(r"^(\d+)\s+(.+)$", dir_name)
    if m:
        return int(m.group(1)), m.group(2)
    return 0, dir_name


# ── Helpers ────────────────────────────────────────────────


def load_manga_urls():
    if not MANGA_JSON.exists():
        return []
    data = json.loads(MANGA_JSON.read_text(encoding="utf-8"))
    return data if isinstance(data, list) else []


def slug_from_url(url):
    return urlparse(url).path.strip("/").split("/")[-1]


def get_existing_chapters(manga_dir):
    existing = set()
    if manga_dir.exists():
        for entry in manga_dir.iterdir():
            if entry.is_dir() and any(entry.glob("*.jpg")):
                existing.add(entry.name)
    return existing


def list_local_manga():
    if not CONTENT_DIR.exists():
        return []
    return sorted(d.name for d in CONTENT_DIR.iterdir() if d.is_dir() and not d.name.startswith("."))


# ── Core: download manga ──────────────────────────────────


def load_manga_page(session, slug):
    """Navigate to manga page, pass CF, return (chapters, metadata, cover_bytes) or None."""
    cover_responses = {}

    def on_cover(response):
        if "/mcover/" in response.url and response.status == 200:
            try:
                cover_responses[response.url] = response.body()
            except Exception:
                pass

    page = session.page
    page.on("response", on_cover)

    print("  Loading manga page...")
    try:
        page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
    except Exception:
        pass
    hide_chrome()
    if not wait_for_cloudflare(session):
        page = session.page
        try:
            page.remove_listener("response", on_cover)
        except Exception:
            pass
        return None

    page = session.page  # may have changed after CF restart
    print("  Fetching chapters...")
    chapters = fetch_chapters_via_api(page, slug)
    if not chapters:
        print("  API failed, trying DOM...")
        chapters = fetch_chapters_from_dom(page)

    metadata = fetch_metadata(page)

    # Wait for cover image to be present in DOM (up to 8s)
    cover_url = None
    for _ in range(16):
        cover_url = page.evaluate("""
            () => {
                const sels = ['img.mip-fill-content[src*="mcover"]', 'img[src*="/mcover/"]'];
                for (const s of sels) {
                    const img = document.querySelector(s);
                    if (img && img.src) return img.src;
                }
                return null;
            }
        """)
        if cover_url:
            break
        page.wait_for_timeout(500)

    # Give the response another moment to be captured
    if cover_url and cover_url not in cover_responses:
        page.wait_for_timeout(1500)

    try:
        page.remove_listener("response", on_cover)
    except Exception:
        pass

    cover_body = None
    if cover_url:
        cover_body = cover_responses.get(cover_url)
        if not cover_body:
            for url, data in cover_responses.items():
                if cover_url.split("?")[0] in url or url.split("?")[0] in cover_url:
                    cover_body = data
                    break

    if not cover_body:
        if cover_url:
            print(f"  Cover URL found but body not captured ({len(cover_responses)} responses)")
        else:
            print(f"  No cover URL found in DOM")

    return chapters, metadata, cover_body


def save_manga_local(slug, metadata, cover_body):
    """Save metadata and cover to local manga-content/."""
    manga_dir = CONTENT_DIR / slug
    manga_dir.mkdir(parents=True, exist_ok=True)

    detail_path = manga_dir / "detail.json"
    if metadata:
        existing = {}
        if detail_path.exists():
            try:
                existing = json.loads(detail_path.read_text(encoding="utf-8"))
            except json.JSONDecodeError:
                pass
        existing.update(metadata)
        detail_path.write_text(json.dumps(existing, ensure_ascii=False, indent=4), encoding="utf-8")

    cover_path = manga_dir / "cover.jpg"
    if not cover_path.exists() and cover_body and len(cover_body) > 100:
        cover_path.write_bytes(cover_body)
        print(f"  Cover saved ({len(cover_body)} bytes)")


def download_chapter(session, slug, chapter_index, chapter, manga_dir):
    """Download a single chapter's images. Returns True if successful."""
    ch_id = chapter["id"]
    ch_name = chapter["chapterName"]
    folder_name = f"{chapter_index} {ch_name}"
    chapter_dir = manga_dir / folder_name

    images = get_chapter_images(session, slug, ch_id)
    if not images:
        print(f"    No images")
        return False

    print(f"    {len(images)} pages")
    chapter_dir.mkdir(parents=True, exist_ok=True)

    ok = 0
    failed = []
    for pn, img in enumerate(images, 1):
        save_path = chapter_dir / f"{pn}.jpg"
        if download_image(session, img, save_path):
            ok += 1
            print(f"    {pn}/{len(images)}", end="\r")
        else:
            failed.append((pn, img))
        time.sleep(0.1)

    if failed:
        time.sleep(1)
        for pn, img in failed:
            save_path = chapter_dir / f"{pn}.jpg"
            if download_image(session, img, save_path):
                ok += 1
            else:
                print(f"    {pn}/{len(images)} FAIL")
            time.sleep(0.3)

    print(f"    {ok}/{len(images)} downloaded" + " " * 20)

    if ok == 0:
        try:
            chapter_dir.rmdir()
        except Exception:
            pass
        return False

    time.sleep(REQUEST_DELAY)
    return True


# ── Core: upload manga ────────────────────────────────────


def upload_manga_to_r2(manga_name, conn):
    """Upload a local manga to R2 and create DB records."""
    manga_path = CONTENT_DIR / manga_name
    detail_path = manga_path / "detail.json"

    if not detail_path.exists():
        print(f"  Skipping {manga_name}: no detail.json")
        return

    detail = json.loads(detail_path.read_text(encoding="utf-8"))
    title = detail.get("mg-title", manga_name)
    slug = manga_name
    genres = detail.get("mg-genres", [])
    description = detail.get("mg-description", "")
    if not description and genres:
        description = f"Genres: {', '.join(genres)}"
    genre = genres[0] if genres else "Drama"

    cur = conn.cursor()

    # Cover
    cover_file = manga_path / "cover.jpg"
    cover_url = ""
    cover_key = f"manga/{slug}/cover.webp"
    if cover_file.exists():
        if not r2_key_exists(cover_key):
            cover_url = upload_to_r2(cover_key, make_cover(cover_file))
            print(f"  Cover uploaded")
        else:
            cover_url = f"{PUBLIC_URL}/{cover_key}"

    # Manga record
    cur.execute('SELECT id, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
    row = cur.fetchone()
    if row:
        manga_id, existing_cover = row
        if cover_url and cover_url != existing_cover:
            cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id))
            conn.commit()
    else:
        cur.execute(
            'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
            "VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id",
            (title, description, cover_url, slug, genre),
        )
        manga_id = cur.fetchone()[0]
        conn.commit()
        print(f"  Created manga (id: {manga_id})")

    # Chapters
    chapter_dirs = sorted(
        [d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")],
        key=lambda d: parse_chapter_dir(d.name)[0],
    )

    for chapter_dir in chapter_dirs:
        order_num, chapter_title = parse_chapter_dir(chapter_dir.name)
        if order_num == 0:
            continue

        cur.execute('SELECT id FROM "Chapter" WHERE "mangaId" = %s AND number = %s', (manga_id, order_num))
        if cur.fetchone():
            print(f"    [{order_num}] {chapter_title} — skip")
            continue

        page_files = sorted(
            [f for f in chapter_dir.iterdir() if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")],
            key=lambda f: int(re.search(r"(\d+)", f.stem).group(1)) if re.search(r"(\d+)", f.stem) else 0,
        )
        if not page_files:
            continue

        print(f"    [{order_num}] {chapter_title} ({len(page_files)} pages)")

        # Upload to R2 first
        def process_page(args, _slug=slug, _order=order_num):
            j, pf = args
            r2_key = f"manga/{_slug}/chapters/{_order}/{j}.webp"
            if not r2_key_exists(r2_key):
                return j, upload_to_r2(r2_key, convert_to_webp(pf))
            return j, f"{PUBLIC_URL}/{r2_key}"

        page_urls = {}
        done = 0
        with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
            futures = {pool.submit(process_page, (j, f)): j for j, f in enumerate(page_files, 1)}
            for future in as_completed(futures):
                j, url = future.result()
                page_urls[j] = url
                done += 1
                print(f"      {done}/{len(page_files)}", end="\r")

        if not page_urls:
            print(f"      Upload failed, skip")
            continue

        # DB records only after R2 upload succeeds
        cur.execute(
            'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
            (manga_id, order_num, chapter_title),
        )
        chapter_id = cur.fetchone()[0]
        for j in sorted(page_urls):
            cur.execute('INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)', (chapter_id, j, page_urls[j]))
        conn.commit()
        print(f"      {len(page_files)} pages uploaded" + " " * 10)


# ── Commands ───────────────────────────────────────────────


def cmd_setup():
    print("\n  Chrome will open. Solve Cloudflare on:")
    print("    1. m.happymh.com")
    print("    2. Any manga page")
    print("    3. Any reader page\n")

    chrome_proc = launch_chrome(BASE_URL)
    if not chrome_proc and not is_port_open(CDP_PORT):
        print("  Failed to launch Chrome")
        return

    input("  Press ENTER when done... ")

    try:
        with sync_playwright() as p:
            browser = p.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}")
            cookies = browser.contexts[0].cookies()
            cf = [c for c in cookies if c["name"] == "cf_clearance"]
            print(f"  cf_clearance: {'found' if cf else 'NOT found'}")
            browser.close()
    except Exception as e:
        print(f"  Could not verify: {e}")

    if chrome_proc:
        chrome_proc.terminate()
    print()


def cmd_download(manga_url=None, chapter_set=None):
    """Download manga. chapter_set is a set of 1-based indices, or None for all."""
    urls = [manga_url] if manga_url else load_manga_urls()
    if not urls:
        print("  No URLs in manga.json")
        return

    print(f"\n  Downloading {len(urls)} manga(s)...\n")

    def run(session):
        for url in urls:
            slug = slug_from_url(url)
            try:
                result = load_manga_page(session, slug)
                if not result:
                    continue
                chapters, metadata, cover_body = result
                if not chapters:
                    print("  No chapters found.")
                    continue
                print(f"  Found {len(chapters)} chapters")
                save_manga_local(slug, metadata, cover_body)

                existing = get_existing_chapters(CONTENT_DIR / slug)

                for i, ch in enumerate(chapters, 1):
                    if chapter_set and i not in chapter_set:
                        continue
                    if any(ch["chapterName"] in name for name in existing):
                        print(f"  [{i}/{len(chapters)}] {ch['chapterName']} — skip")
                        continue
                    print(f"  [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})")
                    download_chapter(session, slug, i, ch, CONTENT_DIR / slug)
                print(f"\n  Done: {slug}")
            except Exception as e:
                print(f"\n  Error: {url}: {e}")
                import traceback
                traceback.print_exc()

    with_browser(run)
    print("\nDownload complete!")


def cmd_upload(manga_name=None):
    if manga_name:
        names = [manga_name]
    else:
        names = list_local_manga()
    if not names:
        print("  No manga in manga-content/")
        return

    print(f"\n  Uploading {len(names)} manga(s)...")
    conn = get_db()
    try:
        for name in names:
            print(f"\n  {'='*50}")
            print(f"  {name}")
            print(f"  {'='*50}")
            upload_manga_to_r2(name, conn)
    finally:
        conn.close()
    print("\nUpload complete!")


def cmd_sync(manga_url=None):
    """Sync: fetch latest chapters, stream directly to R2 (no local save)."""
    urls = [manga_url] if manga_url else load_manga_urls()
    if not urls:
        print("  No URLs in manga.json")
        return

    conn = get_db()

    def run(session):
        for url in urls:
            slug = slug_from_url(url)

            print(f"\n{'='*60}")
            print(f"Syncing: {slug}")
            print(f"{'='*60}")

            # 1. Load manga page + get chapters
            result = load_manga_page(session, slug)
            if not result:
                continue
            chapters, metadata, cover_body = result
            if not chapters:
                print("  No chapters found.")
                continue
            print(f"  {len(chapters)} chapters on site")

            # 2. Ensure manga in DB
            cur = conn.cursor()
            title = metadata.get("mg-title", slug)
            genres = metadata.get("mg-genres", [])
            description = metadata.get("mg-description", "")
            genre = genres[0] if genres else "Drama"

            # Cover → R2 (from RAM)
            cover_url = ""
            cover_key = f"manga/{slug}/cover.webp"
            if cover_body and len(cover_body) > 100:
                if not r2_key_exists(cover_key):
                    cover_webp = make_cover(io.BytesIO(cover_body))
                    cover_url = upload_to_r2(cover_key, cover_webp)
                    print(f"  Cover uploaded to R2")
                else:
                    cover_url = f"{PUBLIC_URL}/{cover_key}"

            cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (slug,))
            row = cur.fetchone()
            if row:
                manga_id = row[0]
                if cover_url:
                    cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id))
                    conn.commit()
            else:
                cur.execute(
                    'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
                    "VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id",
                    (title, description, cover_url, slug, genre),
                )
                manga_id = cur.fetchone()[0]
                conn.commit()
                print(f"  Created manga in DB (id: {manga_id})")

            # 3. Find chapters missing from DB
            cur.execute('SELECT number FROM "Chapter" WHERE "mangaId" = %s', (manga_id,))
            existing_numbers = {row[0] for row in cur.fetchall()}

            new_count = 0
            for i, ch in enumerate(chapters, 1):
                ch_name = ch["chapterName"]
                if i in existing_numbers:
                    continue

                new_count += 1
                print(f"  [{i}/{len(chapters)}] {ch_name} (id={ch['id']})")

                # Get image URLs from reader page
                images = get_chapter_images(session, slug, ch["id"])
                if not images:
                    print(f"    No images")
                    continue

                print(f"    {len(images)} pages")

                # Fetch each image into RAM, convert to WebP, upload to R2
                page_bytes = {}  # page_num -> raw bytes
                ok = 0
                for pn, img in enumerate(images, 1):
                    body = fetch_image_bytes(session, img)
                    if body:
                        page_bytes[pn] = body
                        ok += 1
                        print(f"    Fetched {pn}/{len(images)}", end="\r")
                    else:
                        print(f"    {pn}/{len(images)} FAIL")
                    time.sleep(0.1)

                if not page_bytes:
                    print(f"    No images fetched, skip")
                    continue

                # Upload to R2 first
                def upload_page(args, _slug=slug, _i=i):
                    pn, raw = args
                    r2_key = f"manga/{_slug}/chapters/{_i}/{pn}.webp"
                    webp = convert_to_webp(io.BytesIO(raw))
                    return pn, upload_to_r2(r2_key, webp)

                page_urls = {}
                done = 0
                with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
                    futures = {pool.submit(upload_page, (pn, raw)): pn for pn, raw in page_bytes.items()}
                    for future in as_completed(futures):
                        pn, r2_url = future.result()
                        page_urls[pn] = r2_url
                        done += 1
                        print(f"    R2: {done}/{len(page_bytes)}", end="\r")

                if not page_urls:
                    print(f"    R2 upload failed, skip")
                    continue

                # Only create DB records after R2 upload succeeds
                cur.execute(
                    'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
                    (manga_id, i, ch_name),
                )
                chapter_id = cur.fetchone()[0]
                for pn in sorted(page_urls):
                    cur.execute('INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)', (chapter_id, pn, page_urls[pn]))
                conn.commit()
                print(f"    {len(page_urls)} pages synced" + " " * 20)

                time.sleep(REQUEST_DELAY)

            if new_count == 0:
                print("  Already up to date!")
            else:
                print(f"  Synced {new_count} new chapters")

    try:
        with_browser(run)
    finally:
        conn.close()

    print("\nSync complete!")


def r2_list_prefixes():
    """List manga slugs in R2 by scanning top-level prefixes under manga/."""
    slugs = set()
    paginator = s3.get_paginator("list_objects_v2")
    for pg in paginator.paginate(Bucket=BUCKET, Prefix="manga/", Delimiter="/"):
        for prefix in pg.get("CommonPrefixes", []):
            # "manga/slug/" -> "slug"
            slug = prefix["Prefix"].split("/")[1]
            if slug:
                slugs.add(slug)
    return sorted(slugs)


def r2_count_by_prefix(prefix):
    """Count objects under a prefix."""
    total = 0
    for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
        total += len(pg.get("Contents", []))
    return total


def r2_delete_prefix(prefix):
    """Delete all objects under a prefix."""
    total = 0
    batches = []
    for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
        objects = pg.get("Contents", [])
        if objects:
            batches.append([{"Key": obj["Key"]} for obj in objects])

    def delete_batch(keys):
        s3.delete_objects(Bucket=BUCKET, Delete={"Objects": keys})
        return len(keys)

    with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
        for count in pool.map(delete_batch, batches):
            total += count
            print(f"    {total} deleted", end="\r")
    print(f"    {total} objects deleted" + " " * 10)
    return total


# ── TUI ────────────────────────────────────────────────────


def tui_select(title, options, back=True, search=False):
    """Arrow-key menu. Returns selected index or -1."""
    items = list(options)
    if back:
        items.append("[Back]")
    menu = TerminalMenu(
        items,
        title=title,
        search_key="/" if search else None,
        show_search_hint=search,
    )
    idx = menu.show()
    if idx is None or (back and idx == len(items) - 1):
        return -1
    return idx


_title_cache = {}

def get_manga_title(slug):
    """Read manga title from detail.json or DB, fallback to slug."""
    if slug in _title_cache:
        return _title_cache[slug]
    # Try local detail.json first
    detail_path = CONTENT_DIR / slug / "detail.json"
    if detail_path.exists():
        try:
            detail = json.loads(detail_path.read_text(encoding="utf-8"))
            title = detail.get("mg-title")
            if title:
                _title_cache[slug] = title
                return title
        except Exception:
            pass
    # Try database (batch load all titles)
    try:
        conn = get_db()
        cur = conn.cursor()
        cur.execute('SELECT slug, title FROM "Manga"')
        for row in cur.fetchall():
            _title_cache[row[0]] = row[1]
        conn.close()
        if slug in _title_cache:
            return _title_cache[slug]
    except Exception:
        pass
    return slug


def manga_display_name(slug):
    """Format: 'title (slug)' or just 'slug'."""
    title = get_manga_title(slug)
    if title != slug:
        return f"{title} ({slug})"
    return slug


def tui_pick_manga_url(include_all=True):
    """Pick manga from manga.json. Shows title + slug."""
    urls = load_manga_urls()
    if not urls:
        print("  No URLs in manga.json")
        return None
    slugs = [slug_from_url(u) for u in urls]
    items = []
    if include_all:
        items.append("All manga")
    items += [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
    idx = tui_select("Select manga (/ to search):", items, search=True)
    if idx < 0:
        return None
    if include_all:
        if idx == 0:
            return "__all__"
        return urls[idx - 1]
    return urls[idx]


def tui_pick_local(include_all=True):
    """Pick from local manga-content/. Shows title + slug."""
    local = list_local_manga()
    if not local:
        print("  No manga in manga-content/")
        return None
    items = []
    if include_all:
        items.append("All manga")
    items += [f"{i+1}. {manga_display_name(name)}" for i, name in enumerate(local)]
    idx = tui_select("Select manga (/ to search):", items, search=True)
    if idx < 0:
        return None
    if include_all:
        if idx == 0:
            return "__all__"
        return local[idx - 1]
    return local[idx]


def tui_pick_r2():
    """Pick manga from R2. Shows title + slug."""
    slugs = r2_list_prefixes()
    if not slugs:
        print("  R2 is empty")
        return None
    items = [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
    idx = tui_select("Select manga (/ to search):", items, search=True)
    return slugs[idx] if idx >= 0 else None


def tui_pick_chapters(chapters, slug=None):
    """Multi-select chapter picker. Space to toggle, Enter to confirm.
    Existing chapters shown grayed out. Returns set of selected 1-based indices, or None for all."""
    # Check which chapters already exist locally
    existing = set()
    if slug:
        existing = get_existing_chapters(CONTENT_DIR / slug)

    # Count existing
    existing_count = 0
    for i, ch in enumerate(chapters, 1):
        if any(ch["chapterName"] in name for name in existing):
            existing_count += 1

    idx = tui_select(f"{len(chapters)} chapters ({existing_count} downloaded)", [
        "All chapters (skip existing)",
        "Select chapters (space to toggle)",
    ])
    if idx == -1:
        return "back"
    if idx == 0:
        return None  # all

    items = []
    for i, ch in enumerate(chapters, 1):
        done = any(ch["chapterName"] in name for name in existing)
        label = f"{i}. {ch['chapterName']}"
        if done:
            label = f"\033[90m{label} [done]\033[0m"
        items.append(label)

    menu = TerminalMenu(
        items,
        title="Space=toggle, Enter=confirm, /=search:",
        multi_select=True,
        show_multi_select_hint=True,
        search_key="/",
        show_search_hint=True,
    )
    selected = menu.show()
    if selected is None:
        return "back"
    if isinstance(selected, int):
        selected = (selected,)
    return {i + 1 for i in selected}  # 1-based


def tui_download():
    picked = tui_pick_manga_url()
    if not picked:
        return
    if picked == "__all__":
        cmd_download()
        return

    slug = slug_from_url(picked)
    print(f"\n  Fetching chapters for {slug}...")

    def get_chapters(session):
        page = session.page
        try:
            page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
        except Exception:
            pass
        if not wait_for_cloudflare(session):
            return None
        return fetch_chapters_via_api(session.page, slug)

    chapters = with_browser(get_chapters)

    if not chapters:
        print("  Could not get chapters")
        return

    result = tui_pick_chapters(chapters, slug=slug)
    if result == "back":
        return
    cmd_download(picked, chapter_set=result)


def tui_upload():
    picked = tui_pick_local()
    if not picked:
        return
    if picked == "__all__":
        cmd_upload()
    else:
        cmd_upload(picked)


def tui_sync():
    picked = tui_pick_manga_url()
    if not picked:
        return
    if picked == "__all__":
        cmd_sync()
    else:
        cmd_sync(picked)


def tui_r2_manage():
    while True:
        idx = tui_select("R2 / DB Management", [
            "Status",
            "Delete specific manga",
            "Clear ALL (R2 + DB)",
        ])
        if idx == -1:
            break

        elif idx == 0:
            # Count R2 objects in single pass
            slug_counts = {}
            total = 0
            for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET):
                for obj in pg.get("Contents", []):
                    total += 1
                    parts = obj["Key"].split("/")
                    if len(parts) >= 2 and parts[0] == "manga":
                        slug_counts[parts[1]] = slug_counts.get(parts[1], 0) + 1
            print(f"\n  R2: {total} objects, {len(slug_counts)} manga")
            for slug in sorted(slug_counts):
                print(f"    {manga_display_name(slug)}: {slug_counts[slug]} objects")
            try:
                conn = get_db()
                cur = conn.cursor()
                cur.execute('SELECT COUNT(*) FROM "Manga"')
                mc = cur.fetchone()[0]
                cur.execute('SELECT COUNT(*) FROM "Chapter"')
                cc = cur.fetchone()[0]
                cur.execute('SELECT COUNT(*) FROM "Page"')
                pc = cur.fetchone()[0]
                print(f"  DB: {mc} manga, {cc} chapters, {pc} pages")
                conn.close()
            except Exception as e:
                print(f"  DB: {e}")
            input("\n  Press ENTER...")

        elif idx == 1:
            picked = tui_pick_r2()
            if not picked:
                continue
            confirm = input(f"  Delete {picked} from R2 + DB? [y/N] ").strip().lower()
            if confirm == "y":
                r2_delete_prefix(f"manga/{picked}/")
                try:
                    conn = get_db()
                    cur = conn.cursor()
                    cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (picked,))
                    row = cur.fetchone()
                    if row:
                        mid = row[0]
                        cur.execute('DELETE FROM "Page" WHERE "chapterId" IN (SELECT id FROM "Chapter" WHERE "mangaId" = %s)', (mid,))
                        cur.execute('DELETE FROM "Chapter" WHERE "mangaId" = %s', (mid,))
                        cur.execute('DELETE FROM "Manga" WHERE id = %s', (mid,))
                        conn.commit()
                        print(f"  Removed from R2 + DB")
                    conn.close()
                except Exception as e:
                    print(f"  DB error: {e}")

        elif idx == 2:
            confirm = input("  Delete ALL R2 + DB? [y/N] ").strip().lower()
            if confirm == "y":
                r2_delete_prefix("")
                try:
                    conn = get_db()
                    cur = conn.cursor()
                    for t in ['"Page"', '"Chapter"', '"Manga"']:
                        cur.execute(f"DELETE FROM {t}")
                    conn.commit()
                    conn.close()
                    print("  All cleared")
                except Exception as e:
                    print(f"  DB error: {e}")


def main():
    while True:
        idx = tui_select("Manga Toolkit", [
            "Setup (solve Cloudflare)",
            "Download",
            "Upload (local -> R2)",
            "Sync (site -> R2)",
            "R2 / DB management",
            "Quit",
        ], back=False)

        if idx is None or idx == -1 or idx == 5:
            break
        elif idx == 0:
            cmd_setup()
        elif idx == 1:
            tui_download()
        elif idx == 2:
            tui_upload()
        elif idx == 3:
            tui_sync()
        elif idx == 4:
            tui_r2_manage()

    print("Bye!")


if __name__ == "__main__":
    main()