sunnymh-manga-dl/manga.py
yiekheng e037996c5c Add ESC-to-stop, manga edit, R2 recompress, persistent browser session
- Persistent Chrome session: lazy-started, reused across all operations
  in one run (closed only on Quit). Eliminates per-command startup delay.
- ESC key gracefully stops Download/Upload/Sync after current chapter.
- Edit manga info: TUI form to update title/description/genre/status/cover
  in DB without re-syncing.
- R2 recompress: re-encode all webp images for a manga at quality 65 to
  reclaim storage. Skips files where new size is not smaller.
- Sync now refreshes title/description/genre on existing manga records,
  and saves all genres comma-separated (was only the first).
- Cover detection waits up to 8s for image to appear in DOM, with
  fallback selector.
- WebP encoding uses method=6 quality=75 for smaller files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 10:17:06 +08:00

1717 lines
57 KiB
Python

"""
Manga toolkit — download from m.happymh.com, upload to Cloudflare R2.
Usage:
python manga.py
"""
import io
import json
import os
import re
import select
import sys
import time
import socket
import subprocess
import termios
import threading
import tty
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from urllib.parse import urlparse
import boto3
import psycopg2
from PIL import Image
from dotenv import load_dotenv
from playwright.sync_api import sync_playwright
from simple_term_menu import TerminalMenu
load_dotenv()
# ── Config ─────────────────────────────────────────────────
BASE_URL = "https://m.happymh.com"
ROOT_DIR = Path(__file__).parent
CONTENT_DIR = ROOT_DIR / "manga-content"
MANGA_JSON = ROOT_DIR / "manga.json"
BROWSER_DATA = ROOT_DIR / ".browser-data"
CDP_PORT = 9333
REQUEST_DELAY = 1.5
UPLOAD_WORKERS = 8
CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
# R2
s3 = boto3.client(
"s3",
endpoint_url=f"https://{os.environ['R2_ACCOUNT_ID']}.r2.cloudflarestorage.com",
aws_access_key_id=os.environ["R2_ACCESS_KEY"],
aws_secret_access_key=os.environ["R2_SECRET_KEY"],
region_name="auto",
)
BUCKET = os.environ["R2_BUCKET"]
PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/")
DATABASE_URL = os.environ["DATABASE_URL"]
# ── ESC listener ───────────────────────────────────────────
class EscListener:
"""Context manager: listens for ESC key in background, sets self.stop event."""
def __init__(self):
self.stop = threading.Event()
self._thread = None
self._old = None
self._fd = None
def __enter__(self):
if not sys.stdin.isatty():
return self
self._fd = sys.stdin.fileno()
try:
self._old = termios.tcgetattr(self._fd)
tty.setcbreak(self._fd)
except Exception:
self._old = None
return self
self._thread = threading.Thread(target=self._listen, daemon=True)
self._thread.start()
return self
def _listen(self):
while not self.stop.is_set():
try:
r, _, _ = select.select([sys.stdin], [], [], 0.2)
if r and sys.stdin.read(1) == "\x1b":
self.stop.set()
print("\n ESC pressed — stopping after current item...")
return
except Exception:
return
def __exit__(self, *args):
self.stop.set()
if self._old is not None:
try:
termios.tcsetattr(self._fd, termios.TCSADRAIN, self._old)
except Exception:
pass
# ── Chrome management ──────────────────────────────────────
def hide_chrome():
"""Hide Chrome window on macOS."""
try:
subprocess.Popen(
["osascript", "-e",
'tell application "System Events" to set visible of process "Google Chrome" to false'],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
except Exception:
pass
def is_port_open(port):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(("localhost", port)) == 0
def launch_chrome(start_url=None):
if is_port_open(CDP_PORT):
return None
if not Path(CHROME_PATH).exists():
print(f" Chrome not found at: {CHROME_PATH}")
return None
cmd = [
CHROME_PATH,
f"--remote-debugging-port={CDP_PORT}",
f"--user-data-dir={BROWSER_DATA}",
"--no-first-run",
"--no-default-browser-check",
"--window-position=0,0",
"--window-size=800,600",
"--no-focus-on-navigate",
]
if start_url:
cmd.append(start_url)
proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
for _ in range(30):
if is_port_open(CDP_PORT):
time.sleep(1)
hide_chrome()
return proc
time.sleep(0.5)
print(" Chrome failed to start")
return None
class BrowserSession:
"""Manages Chrome + CDP lifecycle."""
def __init__(self):
self.chrome_proc = None
self.playwright = None
self.browser = None
self.page = None
def start(self):
self.chrome_proc = launch_chrome()
self.playwright = sync_playwright().start()
self.browser = self.playwright.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}")
context = self.browser.contexts[0]
self.page = context.pages[0] if context.pages else context.new_page()
def close(self):
try:
self.browser.close()
except Exception:
pass
if self.chrome_proc:
self.chrome_proc.terminate()
if self.playwright:
self.playwright.stop()
_session_singleton = None
def get_session():
"""Get or lazy-start the global Chrome session."""
global _session_singleton
if _session_singleton is None:
_session_singleton = BrowserSession()
_session_singleton.start()
return _session_singleton
def close_session():
"""Close the global Chrome session (called on exit)."""
global _session_singleton
if _session_singleton is not None:
_session_singleton.close()
_session_singleton = None
def with_browser(func):
"""Run func(session) using the persistent Chrome session."""
return func(get_session())
# ── Cloudflare ─────────────────────────────────────────────
def wait_for_cloudflare(session, timeout=120):
"""Wait for CF to resolve. User solves in the visible browser window."""
page = session.page
for i in range(timeout):
try:
title = page.title()
except Exception:
time.sleep(1)
continue
if "Just a moment" in title or "challenge" in page.url:
if i == 0:
print(" CF challenge — solve in browser...")
elif i % 15 == 0:
print(f" Still waiting for CF... ({i}s)")
time.sleep(1)
continue
if title and ("嗨皮漫画" in title or "happymh" in page.url):
return True
time.sleep(1)
print(" CF timed out.")
return False
# ── Happymh: chapter fetching ─────────────────────────────
def fetch_chapters_via_api(page, slug):
result = page.evaluate("""
async (slug) => {
const all = [];
let total = 0;
for (let p = 1; p <= 30; p++) {
const url = `/v2.0/apis/manga/chapterByPage?code=${slug}&lang=cn&order=asc&page=${p}&_t=${Date.now()}`;
try {
const ctrl = new AbortController();
setTimeout(() => ctrl.abort(), 10000);
const r = await fetch(url, { signal: ctrl.signal });
if (!r.ok) { if (p === 1) return { error: r.status }; break; }
const json = await r.json();
if (!json.data) break;
total = json.data.total || total;
let items = null;
for (const val of Object.values(json.data)) {
if (Array.isArray(val) && val.length > 0) { items = val; break; }
}
if (!items || items.length === 0) break;
for (const ch of items) {
all.push({ id: String(ch.id || ''), chapterName: ch.chapterName || ch.name || '' });
}
if (total && all.length >= total) break;
} catch (e) {
if (p === 1) return { error: e.message };
break;
}
}
return { chapters: all, total };
}
""", slug)
if result and result.get("chapters") and len(result["chapters"]) > 0:
chapters = result["chapters"]
total = result.get("total", len(chapters))
print(f" API: {len(chapters)}/{total} chapters")
return chapters
if result and result.get("error"):
print(f" API error: {result['error']}")
return None
def fetch_chapters_from_dom(page):
try:
page.wait_for_selector("a[href*='/mangaread/']", timeout=15000)
page.wait_for_timeout(1000)
except Exception:
return None
for selector in ["text=展开全部", "text=查看全部", "text=全部章节", "text=展开更多", "text=更多"]:
try:
btn = page.query_selector(selector)
if btn and btn.is_visible():
btn.click()
page.wait_for_timeout(2000)
break
except Exception:
continue
try:
page.wait_for_selector(".MuiDrawer-paper", timeout=5000)
except Exception:
pass
try:
sort_btn = page.query_selector("text=点我改变排序")
if sort_btn and sort_btn.is_visible():
sort_btn.click()
page.wait_for_timeout(2000)
except Exception:
pass
total = page.evaluate("""
() => {
const spans = document.querySelectorAll('.MuiDrawer-paper span');
for (const s of spans) {
const m = s.textContent.match(/共(\\d+)个章节/);
if (m) return parseInt(m[1]);
}
return 0;
}
""")
for _ in range(50):
count = page.evaluate("document.querySelectorAll('.MuiDrawer-paper a[href*=\"/mangaread/\"]').length")
if total and count >= total:
break
clicked = page.evaluate("""
() => {
const walker = document.createTreeWalker(
document.querySelector('.MuiDrawer-paper') || document.body, NodeFilter.SHOW_TEXT
);
while (walker.nextNode()) {
if (walker.currentNode.textContent.includes('加载更多')) {
let el = walker.currentNode.parentElement;
while (el && el.tagName !== 'LI') el = el.parentElement;
if (el) { el.click(); return true; }
walker.currentNode.parentElement.click();
return true;
}
}
return false;
}
""")
if not clicked:
break
page.wait_for_timeout(1000)
chapters = page.evaluate("""
() => {
const container = document.querySelector('.MuiDrawer-paper') || document;
const links = container.querySelectorAll('a[href*="/mangaread/"]');
const chapters = [], seen = new Set();
links.forEach(a => {
const match = a.getAttribute('href').match(/\\/mangaread\\/[^/]+\\/(\\d+)/);
if (match && !seen.has(match[1])) {
seen.add(match[1]);
const name = a.textContent.trim();
if (name && name !== '开始阅读') chapters.push({ id: match[1], chapterName: name });
}
});
return chapters;
}
""")
try:
page.keyboard.press("Escape")
except Exception:
pass
return chapters if chapters else None
# ── Happymh: metadata & cover ─────────────────────────────
def fetch_metadata(page):
html_text = page.content()
metadata = {"mg-url": page.url}
m = re.search(r'<h2 class="mg-title">(.*?)</h2>', html_text)
if m:
metadata["mg-title"] = m.group(1).strip()
m = re.search(r'<p class="mg-sub-title">.*?<a[^>]*>(.*?)</a>', html_text, re.DOTALL)
if m:
metadata["mg-author"] = m.group(1).strip()
genre_matches = re.findall(r'<p class="mg-cate">.*?</p>', html_text, re.DOTALL)
if genre_matches:
metadata["mg-genres"] = re.findall(r'<a[^>]*>(.*?)</a>', genre_matches[0])
m = re.search(r'<div class="mg-desc">.*?<p[^>]*>(.*?)</p>', html_text, re.DOTALL)
if m:
metadata["mg-description"] = m.group(1).strip()
if not metadata.get("mg-description"):
m = re.search(r'<mip-showmore[^>]*>(.*?)</mip-showmore>', html_text, re.DOTALL)
if m:
desc = re.sub(r'<[^>]+>', '', m.group(1)).strip()
if desc:
metadata["mg-description"] = desc
cover_url = page.evaluate("""
() => {
const og = document.querySelector('meta[property="og:image"]');
if (og) return og.content;
for (const sel of ['img.mg-cover', 'img[src*="mcover"]']) {
const img = document.querySelector(sel);
if (img && img.src) return img.src;
}
return null;
}
""")
if cover_url:
metadata["mg-cover"] = cover_url
return metadata
# ── Happymh: image download ───────────────────────────────
def _try_get_chapter_images(session, slug, chapter_id):
"""Single attempt to get chapter images. Returns (images, api_status)."""
captured_images = []
api_info = {"found": False, "status": None, "error": None}
def on_response(response):
if "/apis/manga/reading" not in response.url:
return
# Only capture our chapter, skip prefetched ones
if f"cid={chapter_id}" not in response.url and f"cid%3D{chapter_id}" not in response.url:
return
# Ignore if we already captured images (prevent duplicate/prefetch)
if captured_images:
return
api_info["found"] = True
api_info["status"] = response.status
if response.status != 200:
api_info["error"] = f"status {response.status}"
return
try:
data = response.json()
# Verify chapter ID in response body
resp_cid = str(data.get("data", {}).get("id", ""))
if resp_cid and resp_cid != str(chapter_id):
return
scans = data.get("data", {}).get("scans", [])
if isinstance(scans, str):
scans = json.loads(scans)
for scan in scans:
if isinstance(scan, dict) and "url" in scan:
captured_images.append({
"url": scan["url"],
"no_referrer": scan.get("r", 0) != 0,
})
except Exception as e:
api_info["error"] = str(e)
page = session.page
page.on("response", on_response)
reader_url = f"{BASE_URL}/mangaread/{slug}/{chapter_id}"
print(" Loading reader...")
try:
page.evaluate(f"window.location.href = '{reader_url}'")
except Exception:
pass
hide_chrome()
time.sleep(2)
try:
page.evaluate("window.close = () => {}")
except Exception:
pass
print(" Waiting for page...")
if not wait_for_cloudflare(session, timeout=90):
page = session.page
try:
page.remove_listener("response", on_response)
except Exception:
pass
return [], api_info
page = session.page
print(" Waiting for API...")
deadline = time.time() + 20
while time.time() < deadline:
if captured_images:
break
try:
page.wait_for_timeout(500)
except Exception:
break
try:
page.remove_listener("response", on_response)
except Exception:
pass
if not api_info["found"]:
print(" API not intercepted")
elif api_info["error"]:
print(f" API: {api_info['error']}")
# Filter out next-chapter preview images by counting DOM containers
if captured_images:
try:
counts = page.evaluate("""
() => {
const all = document.querySelectorAll('[class*="imgContainer"]').length;
const next = document.querySelectorAll('[class*="imgNext"]').length;
return { all, next, current: all - next };
}
""")
if counts and counts.get("next", 0) > 0:
actual = counts["current"]
if 0 < actual < len(captured_images):
captured_images = captured_images[:actual]
except Exception:
pass
# DOM fallback
if not captured_images:
try:
page.wait_for_timeout(3000)
dom_images = page.evaluate("""
() => {
const imgs = document.querySelectorAll('img[src*="http"]');
const nextImgs = new Set(
Array.from(document.querySelectorAll('[class*="imgNext"] img'))
.map(img => img.src)
);
const urls = [], seen = new Set();
imgs.forEach(img => {
const src = img.src || '';
if (src && !seen.has(src) && !nextImgs.has(src)
&& !src.includes('/mcover/')
&& !src.includes('cloudflare') && !src.includes('.svg')) {
seen.add(src); urls.push(src);
}
});
return urls;
}
""")
if dom_images:
print(f" DOM: {len(dom_images)} images")
for u in dom_images:
captured_images.append({"url": u, "no_referrer": False})
except Exception as e:
print(f" DOM failed: {e}")
return captured_images, api_info
def get_chapter_images(session, slug, chapter_id):
"""Get chapter images. On API 403 (CF expired), navigate to solve and retry."""
images, api_info = _try_get_chapter_images(session, slug, chapter_id)
if images:
return images
if api_info.get("status") == 403:
print(" CF expired — solve in browser...")
page = session.page
try:
page.goto(f"{BASE_URL}/mangaread/{slug}/{chapter_id}", wait_until="commit", timeout=60000)
except Exception:
pass
if wait_for_cloudflare(session, timeout=120):
images, _ = _try_get_chapter_images(session, slug, chapter_id)
return images
def fetch_image_bytes(session, img):
"""Fetch image via browser network stack, return raw bytes or None."""
page = session.page
url = img["url"]
ref_policy = "no-referrer" if img.get("no_referrer") else "origin"
try:
with page.expect_response(lambda r: url in r.url, timeout=15000) as resp_info:
page.evaluate("([u, r]) => fetch(u, { referrerPolicy: r })", [url, ref_policy])
response = resp_info.value
if response.status == 200:
body = response.body()
if body and len(body) > 100:
return body
except Exception as e:
if not hasattr(fetch_image_bytes, "_err_logged"):
fetch_image_bytes._err_logged = True
print(f"\n First error: {e}")
return None
def download_image(session, img, save_path):
"""Fetch image and save to disk."""
if save_path.exists():
return True
body = fetch_image_bytes(session, img)
if body:
save_path.parent.mkdir(parents=True, exist_ok=True)
save_path.write_bytes(body)
return True
return False
# ── R2 / Upload ────────────────────────────────────────────
WEBP_QUALITY = 75
def _to_webp_bytes(img, quality=WEBP_QUALITY, method=6):
buf = io.BytesIO()
img.save(buf, format="WEBP", quality=quality, method=method)
return buf.getvalue()
def convert_to_webp(source, quality=WEBP_QUALITY):
return _to_webp_bytes(Image.open(source), quality)
def make_cover(source, width=400, height=560):
img = Image.open(source)
target_ratio = width / height
img_ratio = img.width / img.height
if img_ratio > target_ratio:
new_width = int(img.height * target_ratio)
left = (img.width - new_width) // 2
img = img.crop((left, 0, left + new_width, img.height))
else:
new_height = int(img.width / target_ratio)
img = img.crop((0, 0, img.width, new_height))
img = img.resize((width, height), Image.LANCZOS)
return _to_webp_bytes(img, quality=80)
def upload_to_r2(key, data, content_type="image/webp"):
s3.put_object(Bucket=BUCKET, Key=key, Body=data, ContentType=content_type)
return f"{PUBLIC_URL}/{key}"
def r2_key_exists(key):
try:
s3.head_object(Bucket=BUCKET, Key=key)
return True
except s3.exceptions.ClientError:
return False
def get_db():
conn = psycopg2.connect(DATABASE_URL)
conn.set_client_encoding("UTF8")
return conn
def parse_chapter_dir(dir_name):
m = re.match(r"^(\d+)\s+(.+)$", dir_name)
if m:
return int(m.group(1)), m.group(2)
return 0, dir_name
# ── Helpers ────────────────────────────────────────────────
def load_manga_urls():
if not MANGA_JSON.exists():
return []
data = json.loads(MANGA_JSON.read_text(encoding="utf-8"))
return data if isinstance(data, list) else []
def slug_from_url(url):
return urlparse(url).path.strip("/").split("/")[-1]
def get_existing_chapters(manga_dir):
existing = set()
if manga_dir.exists():
for entry in manga_dir.iterdir():
if entry.is_dir() and any(entry.glob("*.jpg")):
existing.add(entry.name)
return existing
def list_local_manga():
if not CONTENT_DIR.exists():
return []
return sorted(d.name for d in CONTENT_DIR.iterdir() if d.is_dir() and not d.name.startswith("."))
# ── Core: download manga ──────────────────────────────────
def load_manga_page(session, slug):
"""Navigate to manga page, pass CF, return (chapters, metadata, cover_bytes) or None."""
cover_responses = {}
def on_cover(response):
if "/mcover/" in response.url and response.status == 200:
try:
cover_responses[response.url] = response.body()
except Exception:
pass
page = session.page
page.on("response", on_cover)
print(" Loading manga page...")
try:
page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
except Exception:
pass
hide_chrome()
if not wait_for_cloudflare(session):
page = session.page
try:
page.remove_listener("response", on_cover)
except Exception:
pass
return None
page = session.page # may have changed after CF restart
print(" Fetching chapters...")
chapters = fetch_chapters_via_api(page, slug)
if not chapters:
print(" API failed, trying DOM...")
chapters = fetch_chapters_from_dom(page)
metadata = fetch_metadata(page)
# Wait for cover image to be present in DOM (up to 8s)
cover_url = None
for _ in range(16):
cover_url = page.evaluate("""
() => {
const sels = ['img.mip-fill-content[src*="mcover"]', 'img[src*="/mcover/"]'];
for (const s of sels) {
const img = document.querySelector(s);
if (img && img.src) return img.src;
}
return null;
}
""")
if cover_url:
break
page.wait_for_timeout(500)
# Give the response another moment to be captured
if cover_url and cover_url not in cover_responses:
page.wait_for_timeout(1500)
try:
page.remove_listener("response", on_cover)
except Exception:
pass
cover_body = None
if cover_url:
cover_body = cover_responses.get(cover_url)
if not cover_body:
for url, data in cover_responses.items():
if cover_url.split("?")[0] in url or url.split("?")[0] in cover_url:
cover_body = data
break
if not cover_body:
if cover_url:
print(f" Cover URL found but body not captured ({len(cover_responses)} responses)")
else:
print(f" No cover URL found in DOM")
return chapters, metadata, cover_body
def save_manga_local(slug, metadata, cover_body):
"""Save metadata and cover to local manga-content/."""
manga_dir = CONTENT_DIR / slug
manga_dir.mkdir(parents=True, exist_ok=True)
detail_path = manga_dir / "detail.json"
if metadata:
existing = {}
if detail_path.exists():
try:
existing = json.loads(detail_path.read_text(encoding="utf-8"))
except json.JSONDecodeError:
pass
existing.update(metadata)
detail_path.write_text(json.dumps(existing, ensure_ascii=False, indent=4), encoding="utf-8")
cover_path = manga_dir / "cover.jpg"
if not cover_path.exists() and cover_body and len(cover_body) > 100:
cover_path.write_bytes(cover_body)
print(f" Cover saved ({len(cover_body)} bytes)")
def download_chapter(session, slug, chapter_index, chapter, manga_dir):
"""Download a single chapter's images. Returns True if successful."""
ch_id = chapter["id"]
ch_name = chapter["chapterName"]
folder_name = f"{chapter_index} {ch_name}"
chapter_dir = manga_dir / folder_name
images = get_chapter_images(session, slug, ch_id)
if not images:
print(f" No images")
return False
print(f" {len(images)} pages")
chapter_dir.mkdir(parents=True, exist_ok=True)
ok = 0
failed = []
for pn, img in enumerate(images, 1):
save_path = chapter_dir / f"{pn}.jpg"
if download_image(session, img, save_path):
ok += 1
print(f" {pn}/{len(images)}", end="\r")
else:
failed.append((pn, img))
time.sleep(0.1)
if failed:
time.sleep(1)
for pn, img in failed:
save_path = chapter_dir / f"{pn}.jpg"
if download_image(session, img, save_path):
ok += 1
else:
print(f" {pn}/{len(images)} FAIL")
time.sleep(0.3)
print(f" {ok}/{len(images)} downloaded" + " " * 20)
if ok == 0:
try:
chapter_dir.rmdir()
except Exception:
pass
return False
time.sleep(REQUEST_DELAY)
return True
# ── Core: upload manga ────────────────────────────────────
def upload_manga_to_r2(manga_name, conn):
"""Upload a local manga to R2 and create DB records."""
manga_path = CONTENT_DIR / manga_name
detail_path = manga_path / "detail.json"
if not detail_path.exists():
print(f" Skipping {manga_name}: no detail.json")
return
detail = json.loads(detail_path.read_text(encoding="utf-8"))
title = detail.get("mg-title", manga_name)
slug = manga_name
genres = detail.get("mg-genres", [])
description = detail.get("mg-description", "")
genre = ", ".join(genres) if genres else "Drama"
cur = conn.cursor()
# Cover
cover_file = manga_path / "cover.jpg"
cover_url = ""
cover_key = f"manga/{slug}/cover.webp"
if cover_file.exists():
if not r2_key_exists(cover_key):
cover_url = upload_to_r2(cover_key, make_cover(cover_file))
print(f" Cover uploaded")
else:
cover_url = f"{PUBLIC_URL}/{cover_key}"
# Manga record
cur.execute('SELECT id, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
row = cur.fetchone()
if row:
manga_id, existing_cover = row
if cover_url and cover_url != existing_cover:
cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id))
conn.commit()
else:
cur.execute(
'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
"VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id",
(title, description, cover_url, slug, genre),
)
manga_id = cur.fetchone()[0]
conn.commit()
print(f" Created manga (id: {manga_id})")
# Chapters
chapter_dirs = sorted(
[d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")],
key=lambda d: parse_chapter_dir(d.name)[0],
)
for chapter_dir in chapter_dirs:
order_num, chapter_title = parse_chapter_dir(chapter_dir.name)
if order_num == 0:
continue
cur.execute('SELECT id FROM "Chapter" WHERE "mangaId" = %s AND number = %s', (manga_id, order_num))
if cur.fetchone():
print(f" [{order_num}] {chapter_title} — skip")
continue
page_files = sorted(
[f for f in chapter_dir.iterdir() if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")],
key=lambda f: int(re.search(r"(\d+)", f.stem).group(1)) if re.search(r"(\d+)", f.stem) else 0,
)
if not page_files:
continue
print(f" [{order_num}] {chapter_title} ({len(page_files)} pages)")
# Upload to R2 first
def process_page(args, _slug=slug, _order=order_num):
j, pf = args
r2_key = f"manga/{_slug}/chapters/{_order}/{j}.webp"
if not r2_key_exists(r2_key):
return j, upload_to_r2(r2_key, convert_to_webp(pf))
return j, f"{PUBLIC_URL}/{r2_key}"
page_urls = {}
done = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
futures = {pool.submit(process_page, (j, f)): j for j, f in enumerate(page_files, 1)}
for future in as_completed(futures):
j, url = future.result()
page_urls[j] = url
done += 1
print(f" {done}/{len(page_files)}", end="\r")
if not page_urls:
print(f" Upload failed, skip")
continue
# DB records only after R2 upload succeeds
cur.execute(
'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
(manga_id, order_num, chapter_title),
)
chapter_id = cur.fetchone()[0]
for j in sorted(page_urls):
cur.execute('INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)', (chapter_id, j, page_urls[j]))
conn.commit()
print(f" {len(page_files)} pages uploaded" + " " * 10)
# ── Commands ───────────────────────────────────────────────
def cmd_setup():
print("\n Chrome will open. Solve Cloudflare on:")
print(" 1. m.happymh.com")
print(" 2. Any manga page")
print(" 3. Any reader page\n")
session = get_session()
try:
session.page.goto(BASE_URL, wait_until="commit", timeout=60000)
except Exception:
pass
# Bring Chrome to front for setup
try:
subprocess.Popen(
["osascript", "-e", 'tell application "Google Chrome" to activate'],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
except Exception:
pass
input(" Press ENTER when done... ")
cookies = session.browser.contexts[0].cookies()
cf = [c for c in cookies if c["name"] == "cf_clearance"]
print(f" cf_clearance: {'found' if cf else 'NOT found'}")
hide_chrome()
print()
def cmd_download(manga_url=None, chapter_set=None):
"""Download manga. chapter_set is a set of 1-based indices, or None for all."""
urls = [manga_url] if manga_url else load_manga_urls()
if not urls:
print(" No URLs in manga.json")
return
print(f"\n Downloading {len(urls)} manga(s)... (ESC to stop)\n")
def run(session):
with EscListener() as esc:
for url in urls:
if esc.stop.is_set():
break
slug = slug_from_url(url)
try:
result = load_manga_page(session, slug)
if not result:
continue
chapters, metadata, cover_body = result
if not chapters:
print(" No chapters found.")
continue
print(f" Found {len(chapters)} chapters")
save_manga_local(slug, metadata, cover_body)
existing = get_existing_chapters(CONTENT_DIR / slug)
for i, ch in enumerate(chapters, 1):
if esc.stop.is_set():
break
if chapter_set and i not in chapter_set:
continue
if any(ch["chapterName"] in name for name in existing):
print(f" [{i}/{len(chapters)}] {ch['chapterName']} — skip")
continue
print(f" [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})")
download_chapter(session, slug, i, ch, CONTENT_DIR / slug)
print(f"\n Done: {slug}")
except Exception as e:
print(f"\n Error: {url}: {e}")
import traceback
traceback.print_exc()
with_browser(run)
print("\nDownload complete!")
def cmd_upload(manga_name=None):
if manga_name:
names = [manga_name]
else:
names = list_local_manga()
if not names:
print(" No manga in manga-content/")
return
print(f"\n Uploading {len(names)} manga(s)... (ESC to stop)")
conn = get_db()
try:
with EscListener() as esc:
for name in names:
if esc.stop.is_set():
break
print(f"\n {'='*50}")
print(f" {name}")
print(f" {'='*50}")
upload_manga_to_r2(name, conn)
finally:
conn.close()
print("\nUpload complete!")
def cmd_sync(manga_url=None):
"""Sync: fetch latest chapters, stream directly to R2 (no local save)."""
urls = [manga_url] if manga_url else load_manga_urls()
if not urls:
print(" No URLs in manga.json")
return
conn = get_db()
def run(session):
with EscListener() as esc:
for url in urls:
if esc.stop.is_set():
break
slug = slug_from_url(url)
print(f"\n{'='*60}")
print(f"Syncing: {slug}")
print(f"{'='*60}")
# 1. Load manga page + get chapters
result = load_manga_page(session, slug)
if not result:
continue
chapters, metadata, cover_body = result
if not chapters:
print(" No chapters found.")
continue
print(f" {len(chapters)} chapters on site")
# 2. Ensure manga in DB
cur = conn.cursor()
title = metadata.get("mg-title", slug)
genres = metadata.get("mg-genres", [])
description = metadata.get("mg-description", "")
genre = ", ".join(genres) if genres else "Drama"
# Cover → R2 (from RAM)
cover_url = ""
cover_key = f"manga/{slug}/cover.webp"
if cover_body and len(cover_body) > 100:
if not r2_key_exists(cover_key):
cover_webp = make_cover(io.BytesIO(cover_body))
cover_url = upload_to_r2(cover_key, cover_webp)
print(f" Cover uploaded to R2")
else:
cover_url = f"{PUBLIC_URL}/{cover_key}"
cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (slug,))
row = cur.fetchone()
if row:
manga_id = row[0]
# Refresh metadata fields (cover only updated if we have a new one)
if cover_url:
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'"coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
(title, description, genre, cover_url, manga_id),
)
else:
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'"updatedAt" = NOW() WHERE id = %s',
(title, description, genre, manga_id),
)
conn.commit()
print(f" Updated metadata (genre: {genre})")
else:
cur.execute(
'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
"VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id",
(title, description, cover_url, slug, genre),
)
manga_id = cur.fetchone()[0]
conn.commit()
print(f" Created manga in DB (id: {manga_id})")
# 3. Find chapters missing from DB
cur.execute('SELECT number FROM "Chapter" WHERE "mangaId" = %s', (manga_id,))
existing_numbers = {row[0] for row in cur.fetchall()}
new_count = 0
for i, ch in enumerate(chapters, 1):
if esc.stop.is_set():
break
ch_name = ch["chapterName"]
if i in existing_numbers:
continue
new_count += 1
print(f" [{i}/{len(chapters)}] {ch_name} (id={ch['id']})")
# Get image URLs from reader page
images = get_chapter_images(session, slug, ch["id"])
if not images:
print(f" No images")
continue
print(f" {len(images)} pages")
# Fetch each image into RAM, convert to WebP, upload to R2
page_bytes = {} # page_num -> raw bytes
ok = 0
for pn, img in enumerate(images, 1):
body = fetch_image_bytes(session, img)
if body:
page_bytes[pn] = body
ok += 1
print(f" Fetched {pn}/{len(images)}", end="\r")
else:
print(f" {pn}/{len(images)} FAIL")
time.sleep(0.1)
if not page_bytes:
print(f" No images fetched, skip")
continue
# Upload to R2 first
def upload_page(args, _slug=slug, _i=i):
pn, raw = args
r2_key = f"manga/{_slug}/chapters/{_i}/{pn}.webp"
webp = convert_to_webp(io.BytesIO(raw))
return pn, upload_to_r2(r2_key, webp)
page_urls = {}
done = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
futures = {pool.submit(upload_page, (pn, raw)): pn for pn, raw in page_bytes.items()}
for future in as_completed(futures):
pn, r2_url = future.result()
page_urls[pn] = r2_url
done += 1
print(f" R2: {done}/{len(page_bytes)}", end="\r")
if not page_urls:
print(f" R2 upload failed, skip")
continue
# Only create DB records after R2 upload succeeds
cur.execute(
'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
(manga_id, i, ch_name),
)
chapter_id = cur.fetchone()[0]
for pn in sorted(page_urls):
cur.execute('INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)', (chapter_id, pn, page_urls[pn]))
conn.commit()
print(f" {len(page_urls)} pages synced" + " " * 20)
time.sleep(REQUEST_DELAY)
if new_count == 0:
print(" Already up to date!")
else:
print(f" Synced {new_count} new chapters")
try:
with_browser(run)
finally:
conn.close()
print("\nSync complete!")
def r2_list_prefixes():
"""List manga slugs in R2 by scanning top-level prefixes under manga/."""
slugs = set()
paginator = s3.get_paginator("list_objects_v2")
for pg in paginator.paginate(Bucket=BUCKET, Prefix="manga/", Delimiter="/"):
for prefix in pg.get("CommonPrefixes", []):
# "manga/slug/" -> "slug"
slug = prefix["Prefix"].split("/")[1]
if slug:
slugs.add(slug)
return sorted(slugs)
def r2_count_by_prefix(prefix):
"""Count objects under a prefix."""
total = 0
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
total += len(pg.get("Contents", []))
return total
def r2_delete_prefix(prefix):
"""Delete all objects under a prefix."""
total = 0
batches = []
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
objects = pg.get("Contents", [])
if objects:
batches.append([{"Key": obj["Key"]} for obj in objects])
def delete_batch(keys):
s3.delete_objects(Bucket=BUCKET, Delete={"Objects": keys})
return len(keys)
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
for count in pool.map(delete_batch, batches):
total += count
print(f" {total} deleted", end="\r")
print(f" {total} objects deleted" + " " * 10)
return total
def r2_recompress(slug, quality=65):
"""Download all webp images for a manga, re-encode at lower quality, re-upload."""
prefix = f"manga/{slug}/"
keys = []
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
for obj in pg.get("Contents", []):
if obj["Key"].endswith(".webp"):
keys.append(obj["Key"])
if not keys:
print(f" No webp files for {slug}")
return
print(f" {len(keys)} files to recompress (quality={quality})")
saved_total = 0
failed = 0
def recompress_one(key):
try:
original = s3.get_object(Bucket=BUCKET, Key=key)["Body"].read()
new_data = _to_webp_bytes(Image.open(io.BytesIO(original)), quality=quality)
saved = len(original) - len(new_data)
if saved > 0:
s3.put_object(Bucket=BUCKET, Key=key, Body=new_data, ContentType="image/webp")
return saved
return 0
except Exception:
return -1
done = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
for saved in pool.map(recompress_one, keys):
done += 1
if saved < 0:
failed += 1
else:
saved_total += saved
print(f" {done}/{len(keys)} — saved {saved_total // 1024} KB", end="\r")
msg = f" Done: {done}/{len(keys)} processed, {saved_total // (1024 * 1024)} MB saved"
if failed:
msg += f" ({failed} failed)"
print(msg + " " * 10)
# ── TUI ────────────────────────────────────────────────────
def tui_select(title, options, back=True, search=False):
"""Arrow-key menu. Returns selected index or -1."""
items = list(options)
if back:
items.append("[Back]")
menu = TerminalMenu(
items,
title=title,
search_key="/" if search else None,
show_search_hint=search,
)
idx = menu.show()
if idx is None or (back and idx == len(items) - 1):
return -1
return idx
_title_cache = {}
def get_manga_title(slug):
"""Read manga title from detail.json or DB, fallback to slug."""
if slug in _title_cache:
return _title_cache[slug]
# Try local detail.json first
detail_path = CONTENT_DIR / slug / "detail.json"
if detail_path.exists():
try:
detail = json.loads(detail_path.read_text(encoding="utf-8"))
title = detail.get("mg-title")
if title:
_title_cache[slug] = title
return title
except Exception:
pass
# Try database (batch load all titles)
try:
conn = get_db()
cur = conn.cursor()
cur.execute('SELECT slug, title FROM "Manga"')
for row in cur.fetchall():
_title_cache[row[0]] = row[1]
conn.close()
if slug in _title_cache:
return _title_cache[slug]
except Exception:
pass
return slug
def manga_display_name(slug):
"""Format: 'title (slug)' or just 'slug'."""
title = get_manga_title(slug)
if title != slug:
return f"{title} ({slug})"
return slug
def tui_pick_manga_url(include_all=True):
"""Pick manga from manga.json. Shows title + slug."""
urls = load_manga_urls()
if not urls:
print(" No URLs in manga.json")
return None
slugs = [slug_from_url(u) for u in urls]
items = []
if include_all:
items.append("All manga")
items += [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
idx = tui_select("Select manga (/ to search):", items, search=True)
if idx < 0:
return None
if include_all:
if idx == 0:
return "__all__"
return urls[idx - 1]
return urls[idx]
def tui_pick_local(include_all=True):
"""Pick from local manga-content/. Shows title + slug."""
local = list_local_manga()
if not local:
print(" No manga in manga-content/")
return None
items = []
if include_all:
items.append("All manga")
items += [f"{i+1}. {manga_display_name(name)}" for i, name in enumerate(local)]
idx = tui_select("Select manga (/ to search):", items, search=True)
if idx < 0:
return None
if include_all:
if idx == 0:
return "__all__"
return local[idx - 1]
return local[idx]
def tui_pick_r2():
"""Pick manga from R2. Shows title + slug."""
slugs = r2_list_prefixes()
if not slugs:
print(" R2 is empty")
return None
items = [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
idx = tui_select("Select manga (/ to search):", items, search=True)
return slugs[idx] if idx >= 0 else None
def tui_pick_chapters(chapters, slug=None):
"""Multi-select chapter picker. Space to toggle, Enter to confirm.
Existing chapters shown grayed out. Returns set of selected 1-based indices, or None for all."""
# Check which chapters already exist locally
existing = set()
if slug:
existing = get_existing_chapters(CONTENT_DIR / slug)
# Count existing
existing_count = 0
for i, ch in enumerate(chapters, 1):
if any(ch["chapterName"] in name for name in existing):
existing_count += 1
idx = tui_select(f"{len(chapters)} chapters ({existing_count} downloaded)", [
"All chapters (skip existing)",
"Select chapters (space to toggle)",
])
if idx == -1:
return "back"
if idx == 0:
return None # all
items = []
for i, ch in enumerate(chapters, 1):
done = any(ch["chapterName"] in name for name in existing)
label = f"{i}. {ch['chapterName']}"
if done:
label = f"\033[90m{label} [done]\033[0m"
items.append(label)
menu = TerminalMenu(
items,
title="Space=toggle, Enter=confirm, /=search:",
multi_select=True,
show_multi_select_hint=True,
search_key="/",
show_search_hint=True,
)
selected = menu.show()
if selected is None:
return "back"
if isinstance(selected, int):
selected = (selected,)
return {i + 1 for i in selected} # 1-based
def tui_download():
picked = tui_pick_manga_url()
if not picked:
return
if picked == "__all__":
cmd_download()
return
slug = slug_from_url(picked)
print(f"\n Fetching chapters for {slug}...")
def get_chapters(session):
page = session.page
try:
page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
except Exception:
pass
if not wait_for_cloudflare(session):
return None
return fetch_chapters_via_api(session.page, slug)
chapters = with_browser(get_chapters)
if not chapters:
print(" Could not get chapters")
return
result = tui_pick_chapters(chapters, slug=slug)
if result == "back":
return
cmd_download(picked, chapter_set=result)
def tui_upload():
picked = tui_pick_local()
if not picked:
return
if picked == "__all__":
cmd_upload()
else:
cmd_upload(picked)
def tui_sync():
picked = tui_pick_manga_url()
if not picked:
return
if picked == "__all__":
cmd_sync()
else:
cmd_sync(picked)
def tui_edit_manga():
"""Edit manga metadata (title, description, genre, status) in DB."""
try:
conn = get_db()
cur = conn.cursor()
cur.execute('SELECT slug, title FROM "Manga" ORDER BY title')
rows = cur.fetchall()
except Exception as e:
print(f" DB error: {e}")
return
if not rows:
print(" No manga in DB")
conn.close()
return
items = [f"{i+1}. {title} ({slug})" for i, (slug, title) in enumerate(rows)]
sel = tui_select("Select manga to edit (/ to search):", items, search=True)
if sel < 0:
conn.close()
return
slug, _ = rows[sel]
cur.execute('SELECT id, title, description, genre, status, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
row = cur.fetchone()
if not row:
print(" Not found")
conn.close()
return
mid, title, description, genre, status, cover_url = row
while True:
print(f"\n Editing: {slug}")
print(f" title: {title}")
print(f" description: {(description or '')[:80]}{'...' if description and len(description) > 80 else ''}")
print(f" genre: {genre}")
print(f" status: {status}")
print(f" coverUrl: {cover_url}")
idx = tui_select("Edit field", [
"title", "description", "genre", "status", "coverUrl",
"Save & exit", "Discard & exit",
])
if idx == -1 or idx == 6:
print(" Discarded.")
break
if idx == 5:
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'status = %s, "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
(title, description, genre, status, cover_url, mid),
)
conn.commit()
print(" Saved.")
break
if idx == 3: # status
opts = ["PUBLISHED", "DRAFT", "HIDDEN"]
s_idx = tui_select("Status:", opts)
if s_idx >= 0:
status = opts[s_idx]
else:
field_name = ["title", "description", "genre", "status", "coverUrl"][idx]
current = {"title": title, "description": description or "", "genre": genre, "coverUrl": cover_url or ""}[field_name]
print(f" Current: {current}")
new_val = input(f" New {field_name} (empty=keep): ").strip()
if new_val:
if idx == 0: title = new_val
elif idx == 1: description = new_val
elif idx == 2: genre = new_val
elif idx == 4: cover_url = new_val
conn.close()
def tui_r2_manage():
while True:
idx = tui_select("R2 / DB Management", [
"Status",
"Edit manga info",
"Delete specific manga",
"Clear ALL (R2 + DB)",
"Recompress manga (quality 65)",
])
if idx == -1:
break
elif idx == 0:
# Count R2 objects in single pass
slug_counts = {}
total = 0
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET):
for obj in pg.get("Contents", []):
total += 1
parts = obj["Key"].split("/")
if len(parts) >= 2 and parts[0] == "manga":
slug_counts[parts[1]] = slug_counts.get(parts[1], 0) + 1
print(f"\n R2: {total} objects, {len(slug_counts)} manga")
for slug in sorted(slug_counts):
print(f" {manga_display_name(slug)}: {slug_counts[slug]} objects")
try:
conn = get_db()
cur = conn.cursor()
cur.execute('SELECT COUNT(*) FROM "Manga"')
mc = cur.fetchone()[0]
cur.execute('SELECT COUNT(*) FROM "Chapter"')
cc = cur.fetchone()[0]
cur.execute('SELECT COUNT(*) FROM "Page"')
pc = cur.fetchone()[0]
print(f" DB: {mc} manga, {cc} chapters, {pc} pages")
conn.close()
except Exception as e:
print(f" DB: {e}")
input("\n Press ENTER...")
elif idx == 1:
tui_edit_manga()
elif idx == 2:
picked = tui_pick_r2()
if not picked:
continue
confirm = input(f" Delete {picked} from R2 + DB? [y/N] ").strip().lower()
if confirm == "y":
r2_delete_prefix(f"manga/{picked}/")
try:
conn = get_db()
cur = conn.cursor()
cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (picked,))
row = cur.fetchone()
if row:
mid = row[0]
cur.execute('DELETE FROM "Page" WHERE "chapterId" IN (SELECT id FROM "Chapter" WHERE "mangaId" = %s)', (mid,))
cur.execute('DELETE FROM "Chapter" WHERE "mangaId" = %s', (mid,))
cur.execute('DELETE FROM "Manga" WHERE id = %s', (mid,))
conn.commit()
print(f" Removed from R2 + DB")
conn.close()
except Exception as e:
print(f" DB error: {e}")
elif idx == 3:
confirm = input(" Delete ALL R2 + DB? [y/N] ").strip().lower()
if confirm == "y":
r2_delete_prefix("")
try:
conn = get_db()
cur = conn.cursor()
for t in ['"Page"', '"Chapter"', '"Manga"']:
cur.execute(f"DELETE FROM {t}")
conn.commit()
conn.close()
print(" All cleared")
except Exception as e:
print(f" DB error: {e}")
elif idx == 4:
slugs = r2_list_prefixes()
if not slugs:
print(" R2 is empty")
continue
items = ["All manga"] + [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
sel = tui_select("Recompress which? (quality=65, overwrites originals)", items, search=True)
if sel < 0:
continue
targets = slugs if sel == 0 else [slugs[sel - 1]]
confirm = input(f" Recompress {len(targets)} manga to quality 65? [y/N] ").strip().lower()
if confirm != "y":
continue
for slug in targets:
print(f"\n {manga_display_name(slug)}")
r2_recompress(slug, quality=65)
def main():
try:
while True:
idx = tui_select("Manga Toolkit", [
"Setup (solve Cloudflare)",
"Download",
"Upload (local -> R2)",
"Sync (site -> R2)",
"R2 / DB management",
"Quit",
], back=False)
if idx is None or idx == -1 or idx == 5:
break
elif idx == 0:
cmd_setup()
elif idx == 1:
tui_download()
elif idx == 2:
tui_upload()
elif idx == 3:
tui_sync()
elif idx == 4:
tui_r2_manage()
finally:
close_session()
print("Bye!")
if __name__ == "__main__":
main()