yiekheng 9cb9b8c7fd Lazy config, cross-platform support, session recovery, doc accuracy
Code:
- Defer boto3 client and DATABASE_URL reads to first use via
  _ensure_config(). Missing .env now prints a friendly "Missing env
  vars" list and exits instead of KeyError on import.
- Auto-detect Chrome binary from CHROME_CANDIDATES (macOS/Linux/Windows
  paths). Friendly error listing tried paths if none found.
- Guard termios/tty imports; EscListener becomes a no-op on Windows.
- hide_chrome() is a no-op on non-macOS (osascript only works on Darwin).
- with_browser catches target-closed/disconnected errors, resets the
  session singleton, and retries once before raising.

Docs:
- Fix claim that page.goto is never used — manga listing uses
  page.goto, only reader pages use window.location.href.
- Correct AppleScript command (full tell-application form).
- Clarify "Check missing pages" flow — re-upload is inline; dim-only
  fix reads bytes from R2 without re-upload.
- Add CREATE TABLE statements for Manga/Chapter/Page so schema contract
  is explicit.
- Add "Where to change what" table mapping tasks to code locations.
- Document lazy config, cross-platform constraints, and anti-patterns
  (headless, thread parallelism).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-12 18:32:20 +08:00

2063 lines
69 KiB
Python

"""
Manga toolkit — download from m.happymh.com, upload to Cloudflare R2.
Usage:
python manga.py
"""
import io
import json
import os
import platform
import re
import select
import sys
import time
import socket
import subprocess
import threading
IS_MACOS = platform.system() == "Darwin"
# POSIX-only TTY modules; EscListener is a no-op on Windows.
try:
import termios
import tty
_HAS_TERMIOS = True
except ImportError:
termios = None
tty = None
_HAS_TERMIOS = False
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from urllib.parse import urlparse
import boto3
import psycopg2
from PIL import Image
from dotenv import load_dotenv
from playwright.sync_api import sync_playwright
from simple_term_menu import TerminalMenu
load_dotenv()
# ── Config ─────────────────────────────────────────────────
BASE_URL = "https://m.happymh.com"
ROOT_DIR = Path(__file__).parent
CONTENT_DIR = ROOT_DIR / "manga-content"
MANGA_JSON = ROOT_DIR / "manga.json"
BROWSER_DATA = ROOT_DIR / ".browser-data"
CDP_PORT = 9333
REQUEST_DELAY = 1.5
UPLOAD_WORKERS = 8
CHROME_CANDIDATES = [
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", # macOS
"/usr/bin/google-chrome", # Linux
"/usr/bin/google-chrome-stable",
"/usr/bin/chromium",
"/usr/bin/chromium-browser",
r"C:\Program Files\Google\Chrome\Application\chrome.exe", # Windows
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
]
def _find_chrome():
for p in CHROME_CANDIDATES:
if Path(p).exists():
return p
return None
CHROME_PATH = _find_chrome()
# R2/DB config loaded lazily so missing .env gives a friendly error, not KeyError on import.
_REQUIRED_ENV = ("R2_ACCOUNT_ID", "R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_BUCKET", "R2_PUBLIC_URL", "DATABASE_URL")
s3 = None
BUCKET = None
PUBLIC_URL = None
DATABASE_URL = None
_config_loaded = False
def _ensure_config():
global s3, BUCKET, PUBLIC_URL, DATABASE_URL, _config_loaded
if _config_loaded:
return
missing = [k for k in _REQUIRED_ENV if not os.environ.get(k)]
if missing:
print("Missing env vars (check .env):")
for k in missing:
print(f" {k}")
sys.exit(1)
s3 = boto3.client(
"s3",
endpoint_url=f"https://{os.environ['R2_ACCOUNT_ID']}.r2.cloudflarestorage.com",
aws_access_key_id=os.environ["R2_ACCESS_KEY"],
aws_secret_access_key=os.environ["R2_SECRET_KEY"],
region_name="auto",
)
BUCKET = os.environ["R2_BUCKET"]
PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/")
DATABASE_URL = os.environ["DATABASE_URL"]
_config_loaded = True
# ── ESC listener ───────────────────────────────────────────
class EscListener:
"""Context manager: listens for ESC key in background, sets self.stop event."""
def __init__(self):
self.stop = threading.Event()
self._thread = None
self._old = None
self._fd = None
def __enter__(self):
if not _HAS_TERMIOS or not sys.stdin.isatty():
return self
self._fd = sys.stdin.fileno()
try:
self._old = termios.tcgetattr(self._fd)
tty.setcbreak(self._fd)
except Exception:
self._old = None
return self
self._thread = threading.Thread(target=self._listen, daemon=True)
self._thread.start()
return self
def _listen(self):
while not self.stop.is_set():
try:
r, _, _ = select.select([sys.stdin], [], [], 0.2)
if r and sys.stdin.read(1) == "\x1b":
self.stop.set()
print("\n ESC pressed — stopping after current item...")
return
except Exception:
return
def __exit__(self, *args):
self.stop.set()
if self._old is not None:
try:
termios.tcsetattr(self._fd, termios.TCSADRAIN, self._old)
except Exception:
pass
# ── Chrome management ──────────────────────────────────────
def hide_chrome():
"""Hide Chrome window (macOS only; no-op elsewhere)."""
if not IS_MACOS:
return
try:
subprocess.Popen(
["osascript", "-e",
'tell application "System Events" to set visible of process "Google Chrome" to false'],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
except Exception:
pass
def is_port_open(port):
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
return s.connect_ex(("localhost", port)) == 0
def launch_chrome(start_url=None):
if is_port_open(CDP_PORT):
return None
if not CHROME_PATH or not Path(CHROME_PATH).exists():
print(" Chrome not found. Install Google Chrome or Chromium.")
print(" Searched:")
for p in CHROME_CANDIDATES:
print(f" {p}")
return None
cmd = [
CHROME_PATH,
f"--remote-debugging-port={CDP_PORT}",
f"--user-data-dir={BROWSER_DATA}",
"--no-first-run",
"--no-default-browser-check",
"--window-position=0,0",
"--window-size=800,600",
"--no-focus-on-navigate",
]
if start_url:
cmd.append(start_url)
proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
for _ in range(30):
if is_port_open(CDP_PORT):
time.sleep(1)
hide_chrome()
return proc
time.sleep(0.5)
print(" Chrome failed to start")
return None
class BrowserSession:
"""Manages Chrome + CDP lifecycle."""
def __init__(self):
self.chrome_proc = None
self.playwright = None
self.browser = None
self.page = None
def start(self):
self.chrome_proc = launch_chrome()
self.playwright = sync_playwright().start()
self.browser = self.playwright.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}")
context = self.browser.contexts[0]
self.page = context.pages[0] if context.pages else context.new_page()
def close(self):
try:
self.browser.close()
except Exception:
pass
if self.chrome_proc:
self.chrome_proc.terminate()
if self.playwright:
self.playwright.stop()
_session_singleton = None
def get_session():
"""Get or lazy-start the global Chrome session."""
global _session_singleton
if _session_singleton is None:
_session_singleton = BrowserSession()
_session_singleton.start()
return _session_singleton
def close_session():
"""Close the global Chrome session (called on exit)."""
global _session_singleton
if _session_singleton is not None:
_session_singleton.close()
_session_singleton = None
def with_browser(func):
"""Run func(session) using the persistent Chrome session.
If the session crashed (target closed etc.), reset and retry once."""
session = get_session()
try:
return func(session)
except Exception as e:
msg = str(e).lower()
if "target" in msg or "browser" in msg or "closed" in msg or "disconnected" in msg:
print(" Browser session lost, restarting...")
close_session()
return func(get_session())
raise
# ── Cloudflare ─────────────────────────────────────────────
def _wait_for_cf_on_page(page, timeout=120):
"""Wait for CF to resolve on a specific page."""
for i in range(timeout):
try:
title = page.title()
except Exception:
time.sleep(1)
continue
if "Just a moment" in title or "challenge" in page.url:
time.sleep(1)
continue
if title and ("嗨皮漫画" in title or "happymh" in page.url):
return True
time.sleep(1)
return False
def wait_for_cloudflare(session, timeout=120):
"""Wait for CF to resolve. User solves in the visible browser window."""
page = session.page
for i in range(timeout):
try:
title = page.title()
except Exception:
time.sleep(1)
continue
if "Just a moment" in title or "challenge" in page.url:
if i == 0:
print(" CF challenge — solve in browser...")
elif i % 15 == 0:
print(f" Still waiting for CF... ({i}s)")
time.sleep(1)
continue
if title and ("嗨皮漫画" in title or "happymh" in page.url):
return True
time.sleep(1)
print(" CF timed out.")
return False
# ── Happymh: chapter fetching ─────────────────────────────
def fetch_chapters_via_api(page, slug):
result = page.evaluate("""
async (slug) => {
const all = [];
let total = 0;
for (let p = 1; p <= 30; p++) {
const url = `/v2.0/apis/manga/chapterByPage?code=${slug}&lang=cn&order=asc&page=${p}&_t=${Date.now()}`;
try {
const ctrl = new AbortController();
setTimeout(() => ctrl.abort(), 10000);
const r = await fetch(url, { signal: ctrl.signal });
if (!r.ok) { if (p === 1) return { error: r.status }; break; }
const json = await r.json();
if (!json.data) break;
total = json.data.total || total;
let items = null;
for (const val of Object.values(json.data)) {
if (Array.isArray(val) && val.length > 0) { items = val; break; }
}
if (!items || items.length === 0) break;
for (const ch of items) {
all.push({ id: String(ch.id || ''), chapterName: ch.chapterName || ch.name || '' });
}
if (total && all.length >= total) break;
} catch (e) {
if (p === 1) return { error: e.message };
break;
}
}
return { chapters: all, total };
}
""", slug)
if result and result.get("chapters") and len(result["chapters"]) > 0:
chapters = result["chapters"]
total = result.get("total", len(chapters))
print(f" API: {len(chapters)}/{total} chapters")
return chapters
if result and result.get("error"):
print(f" API error: {result['error']}")
return None
def fetch_chapters_from_dom(page):
try:
page.wait_for_selector("a[href*='/mangaread/']", timeout=15000)
page.wait_for_timeout(1000)
except Exception:
return None
for selector in ["text=展开全部", "text=查看全部", "text=全部章节", "text=展开更多", "text=更多"]:
try:
btn = page.query_selector(selector)
if btn and btn.is_visible():
btn.click()
page.wait_for_timeout(2000)
break
except Exception:
continue
try:
page.wait_for_selector(".MuiDrawer-paper", timeout=5000)
except Exception:
pass
try:
sort_btn = page.query_selector("text=点我改变排序")
if sort_btn and sort_btn.is_visible():
sort_btn.click()
page.wait_for_timeout(2000)
except Exception:
pass
total = page.evaluate("""
() => {
const spans = document.querySelectorAll('.MuiDrawer-paper span');
for (const s of spans) {
const m = s.textContent.match(/共(\\d+)个章节/);
if (m) return parseInt(m[1]);
}
return 0;
}
""")
for _ in range(50):
count = page.evaluate("document.querySelectorAll('.MuiDrawer-paper a[href*=\"/mangaread/\"]').length")
if total and count >= total:
break
clicked = page.evaluate("""
() => {
const walker = document.createTreeWalker(
document.querySelector('.MuiDrawer-paper') || document.body, NodeFilter.SHOW_TEXT
);
while (walker.nextNode()) {
if (walker.currentNode.textContent.includes('加载更多')) {
let el = walker.currentNode.parentElement;
while (el && el.tagName !== 'LI') el = el.parentElement;
if (el) { el.click(); return true; }
walker.currentNode.parentElement.click();
return true;
}
}
return false;
}
""")
if not clicked:
break
page.wait_for_timeout(1000)
chapters = page.evaluate("""
() => {
const container = document.querySelector('.MuiDrawer-paper') || document;
const links = container.querySelectorAll('a[href*="/mangaread/"]');
const chapters = [], seen = new Set();
links.forEach(a => {
const match = a.getAttribute('href').match(/\\/mangaread\\/[^/]+\\/(\\d+)/);
if (match && !seen.has(match[1])) {
seen.add(match[1]);
const name = a.textContent.trim();
if (name && name !== '开始阅读') chapters.push({ id: match[1], chapterName: name });
}
});
return chapters;
}
""")
try:
page.keyboard.press("Escape")
except Exception:
pass
return chapters if chapters else None
# ── Happymh: metadata & cover ─────────────────────────────
def fetch_metadata(page):
html_text = page.content()
metadata = {"mg-url": page.url}
m = re.search(r'<h2 class="mg-title">(.*?)</h2>', html_text)
if m:
metadata["mg-title"] = m.group(1).strip()
m = re.search(r'<p class="mg-sub-title">.*?<a[^>]*>(.*?)</a>', html_text, re.DOTALL)
if m:
metadata["mg-author"] = m.group(1).strip()
genre_matches = re.findall(r'<p class="mg-cate">.*?</p>', html_text, re.DOTALL)
if genre_matches:
metadata["mg-genres"] = re.findall(r'<a[^>]*>(.*?)</a>', genre_matches[0])
m = re.search(r'<div class="mg-desc">.*?<p[^>]*>(.*?)</p>', html_text, re.DOTALL)
if m:
metadata["mg-description"] = m.group(1).strip()
if not metadata.get("mg-description"):
m = re.search(r'<mip-showmore[^>]*>(.*?)</mip-showmore>', html_text, re.DOTALL)
if m:
desc = re.sub(r'<[^>]+>', '', m.group(1)).strip()
if desc:
metadata["mg-description"] = desc
cover_url = page.evaluate("""
() => {
const og = document.querySelector('meta[property="og:image"]');
if (og) return og.content;
for (const sel of ['img.mg-cover', 'img[src*="mcover"]']) {
const img = document.querySelector(sel);
if (img && img.src) return img.src;
}
return null;
}
""")
if cover_url:
metadata["mg-cover"] = cover_url
return metadata
# ── Happymh: image download ───────────────────────────────
def _try_get_chapter_images(page, slug, chapter_id):
"""Single attempt to get chapter images. Returns (images, api_status)."""
captured_images = []
api_info = {"found": False, "status": None, "error": None}
def on_response(response):
if "/apis/manga/reading" not in response.url:
return
# Only capture our chapter, skip prefetched ones
if f"cid={chapter_id}" not in response.url and f"cid%3D{chapter_id}" not in response.url:
return
# Ignore if we already captured images (prevent duplicate/prefetch)
if captured_images:
return
api_info["found"] = True
api_info["status"] = response.status
if response.status != 200:
api_info["error"] = f"status {response.status}"
return
try:
data = response.json()
# Verify chapter ID in response body
resp_cid = str(data.get("data", {}).get("id", ""))
if resp_cid and resp_cid != str(chapter_id):
return
scans = data.get("data", {}).get("scans", [])
if isinstance(scans, str):
scans = json.loads(scans)
for scan in scans:
if isinstance(scan, dict) and "url" in scan:
captured_images.append({
"url": scan["url"],
"no_referrer": scan.get("r", 0) != 0,
})
except Exception as e:
api_info["error"] = str(e)
page.on("response", on_response)
reader_url = f"{BASE_URL}/mangaread/{slug}/{chapter_id}"
try:
page.evaluate(f"window.location.href = '{reader_url}'")
except Exception:
pass
hide_chrome()
time.sleep(2)
try:
page.evaluate("window.close = () => {}")
except Exception:
pass
if not _wait_for_cf_on_page(page, timeout=90):
try:
page.remove_listener("response", on_response)
except Exception:
pass
return [], api_info
deadline = time.time() + 20
while time.time() < deadline:
if captured_images:
break
try:
page.wait_for_timeout(500)
except Exception:
break
try:
page.remove_listener("response", on_response)
except Exception:
pass
if not api_info["found"]:
print(" API not intercepted")
elif api_info["error"]:
print(f" API: {api_info['error']}")
# Filter out next-chapter preview images by counting DOM containers
if captured_images:
try:
counts = page.evaluate("""
() => {
const all = document.querySelectorAll('[class*="imgContainer"]').length;
const next = document.querySelectorAll('[class*="imgNext"]').length;
return { all, next, current: all - next };
}
""")
if counts and counts.get("next", 0) > 0:
actual = counts["current"]
if 0 < actual < len(captured_images):
captured_images = captured_images[:actual]
except Exception:
pass
# DOM fallback
if not captured_images:
try:
page.wait_for_timeout(3000)
dom_images = page.evaluate("""
() => {
const imgs = document.querySelectorAll('img[src*="http"]');
const nextImgs = new Set(
Array.from(document.querySelectorAll('[class*="imgNext"] img'))
.map(img => img.src)
);
const urls = [], seen = new Set();
imgs.forEach(img => {
const src = img.src || '';
if (src && !seen.has(src) && !nextImgs.has(src)
&& !src.includes('/mcover/')
&& !src.includes('cloudflare') && !src.includes('.svg')) {
seen.add(src); urls.push(src);
}
});
return urls;
}
""")
if dom_images:
print(f" DOM: {len(dom_images)} images")
for u in dom_images:
captured_images.append({"url": u, "no_referrer": False})
except Exception as e:
print(f" DOM failed: {e}")
return captured_images, api_info
def get_chapter_images(page, slug, chapter_id):
"""Get chapter images using given page. On API 403, returns empty (caller should handle CF)."""
images, api_info = _try_get_chapter_images(page, slug, chapter_id)
return images, api_info
def fetch_all_pages(page, images, max_attempts=3):
"""Fetch all pages with retry using given page. Returns {page_num: bytes}."""
total = len(images)
page_bytes = {}
pending = list(enumerate(images, 1))
for attempt in range(1, max_attempts + 1):
if not pending:
break
if attempt > 1:
time.sleep(2)
next_pending = []
for pn, img in pending:
body = fetch_image_bytes(page, img)
if body:
page_bytes[pn] = body
else:
next_pending.append((pn, img))
time.sleep(0.1)
pending = next_pending
return page_bytes
def _fetch_via_page(page, url, ref_policy):
try:
with page.expect_response(lambda r: url.split("?")[0] in r.url, timeout=15000) as resp_info:
page.evaluate("([u, r]) => fetch(u, { referrerPolicy: r })", [url, ref_policy])
response = resp_info.value
if response.status == 200:
body = response.body()
if body and len(body) > 100:
return body
except Exception:
pass
return None
def fetch_image_bytes(page, img):
"""Fetch image via browser network stack using given page. Tries URL variants on failure."""
url = img["url"]
ref_policy = "no-referrer" if img.get("no_referrer") else "origin"
# Try original URL
body = _fetch_via_page(page, url, ref_policy)
if body:
return body
# Fallback: strip query string (e.g., ?q=50)
if "?" in url:
body = _fetch_via_page(page, url.split("?")[0], ref_policy)
if body:
return body
return None
def download_image(page, img, save_path):
"""Fetch image and save to disk."""
if save_path.exists():
return True
body = fetch_image_bytes(page, img)
if body:
save_path.parent.mkdir(parents=True, exist_ok=True)
save_path.write_bytes(body)
return True
return False
# ── R2 / Upload ────────────────────────────────────────────
WEBP_QUALITY = 75
def _to_webp_bytes(img, quality=WEBP_QUALITY, method=6):
buf = io.BytesIO()
img.save(buf, format="WEBP", quality=quality, method=method)
return buf.getvalue()
def convert_to_webp(source, quality=WEBP_QUALITY):
return _to_webp_bytes(Image.open(source), quality)
def probe_and_webp(source, quality=WEBP_QUALITY):
"""Open once; return (width, height, webp_bytes)."""
with Image.open(source) as img:
return img.width, img.height, _to_webp_bytes(img, quality)
def insert_pages(cur, chapter_id, page_urls):
"""page_urls: {page_num: (url, width, height)}. Inserts in page_num order."""
for pn in sorted(page_urls):
url, w, h = page_urls[pn]
cur.execute(
'INSERT INTO "Page" ("chapterId", number, "imageUrl", width, height) VALUES (%s, %s, %s, %s, %s)',
(chapter_id, pn, url, w, h),
)
def make_cover(source, width=400, height=560):
img = Image.open(source)
target_ratio = width / height
img_ratio = img.width / img.height
if img_ratio > target_ratio:
new_width = int(img.height * target_ratio)
left = (img.width - new_width) // 2
img = img.crop((left, 0, left + new_width, img.height))
else:
new_height = int(img.width / target_ratio)
img = img.crop((0, 0, img.width, new_height))
img = img.resize((width, height), Image.LANCZOS)
return _to_webp_bytes(img, quality=80)
def upload_to_r2(key, data, content_type="image/webp"):
_ensure_config()
s3.put_object(Bucket=BUCKET, Key=key, Body=data, ContentType=content_type)
return f"{PUBLIC_URL}/{key}"
def r2_key_exists(key):
_ensure_config()
try:
s3.head_object(Bucket=BUCKET, Key=key)
return True
except s3.exceptions.ClientError:
return False
def get_db():
_ensure_config()
conn = psycopg2.connect(DATABASE_URL)
conn.set_client_encoding("UTF8")
return conn
def parse_chapter_dir(dir_name):
m = re.match(r"^(\d+)\s+(.+)$", dir_name)
if m:
return int(m.group(1)), m.group(2)
return 0, dir_name
# ── Helpers ────────────────────────────────────────────────
def load_manga_urls():
if not MANGA_JSON.exists():
return []
data = json.loads(MANGA_JSON.read_text(encoding="utf-8"))
return data if isinstance(data, list) else []
def slug_from_url(url):
return urlparse(url).path.strip("/").split("/")[-1]
def get_existing_chapters(manga_dir):
existing = set()
if manga_dir.exists():
for entry in manga_dir.iterdir():
if entry.is_dir() and any(entry.glob("*.jpg")):
existing.add(entry.name)
return existing
def list_local_manga():
if not CONTENT_DIR.exists():
return []
return sorted(d.name for d in CONTENT_DIR.iterdir() if d.is_dir() and not d.name.startswith("."))
# ── Core: download manga ──────────────────────────────────
def load_manga_page(session, slug):
"""Navigate to manga page, pass CF, return (chapters, metadata, cover_bytes) or None."""
cover_responses = {}
def on_cover(response):
if "/mcover/" in response.url and response.status == 200:
try:
cover_responses[response.url] = response.body()
except Exception:
pass
page = session.page
page.on("response", on_cover)
print(" Loading manga page...")
try:
page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
except Exception:
pass
hide_chrome()
if not wait_for_cloudflare(session):
page = session.page
try:
page.remove_listener("response", on_cover)
except Exception:
pass
return None
page = session.page # may have changed after CF restart
print(" Fetching chapters...")
chapters = fetch_chapters_via_api(page, slug)
if not chapters:
print(" API failed, trying DOM...")
chapters = fetch_chapters_from_dom(page)
metadata = fetch_metadata(page)
# Wait for cover image to be present in DOM (up to 8s)
cover_url = None
for _ in range(16):
cover_url = page.evaluate("""
() => {
const sels = ['img.mip-fill-content[src*="mcover"]', 'img[src*="/mcover/"]'];
for (const s of sels) {
const img = document.querySelector(s);
if (img && img.src) return img.src;
}
return null;
}
""")
if cover_url:
break
page.wait_for_timeout(500)
# Give the response another moment to be captured
if cover_url and cover_url not in cover_responses:
page.wait_for_timeout(1500)
try:
page.remove_listener("response", on_cover)
except Exception:
pass
cover_body = None
if cover_url:
cover_body = cover_responses.get(cover_url)
if not cover_body:
for url, data in cover_responses.items():
if cover_url.split("?")[0] in url or url.split("?")[0] in cover_url:
cover_body = data
break
if not cover_body:
if cover_url:
print(f" Cover URL found but body not captured ({len(cover_responses)} responses)")
else:
print(f" No cover URL found in DOM")
return chapters, metadata, cover_body
def save_manga_local(slug, metadata, cover_body):
"""Save metadata and cover to local manga-content/."""
manga_dir = CONTENT_DIR / slug
manga_dir.mkdir(parents=True, exist_ok=True)
detail_path = manga_dir / "detail.json"
if metadata:
existing = {}
if detail_path.exists():
try:
existing = json.loads(detail_path.read_text(encoding="utf-8"))
except json.JSONDecodeError:
pass
existing.update(metadata)
detail_path.write_text(json.dumps(existing, ensure_ascii=False, indent=4), encoding="utf-8")
cover_path = manga_dir / "cover.jpg"
if not cover_path.exists() and cover_body and len(cover_body) > 100:
cover_path.write_bytes(cover_body)
print(f" Cover saved ({len(cover_body)} bytes)")
def download_chapter(session, slug, chapter_index, chapter, manga_dir):
"""Download a single chapter's images. Returns True if successful."""
ch_id = chapter["id"]
ch_name = chapter["chapterName"]
folder_name = f"{chapter_index} {ch_name}"
chapter_dir = manga_dir / folder_name
images, _ = get_chapter_images(session.page, slug, ch_id)
if not images:
print(f" No images")
return False
print(f" {len(images)} pages")
chapter_dir.mkdir(parents=True, exist_ok=True)
page_bytes = fetch_all_pages(session.page, images)
ok = 0
for pn, body in page_bytes.items():
save_path = chapter_dir / f"{pn}.jpg"
save_path.write_bytes(body)
ok += 1
print(f" {ok}/{len(images)} downloaded" + " " * 20)
if ok < len(images):
try:
chapter_dir.rmdir()
except Exception:
pass
return False
time.sleep(REQUEST_DELAY)
return True
# ── Core: upload manga ────────────────────────────────────
def upload_manga_to_r2(manga_name, conn):
"""Upload a local manga to R2 and create DB records."""
manga_path = CONTENT_DIR / manga_name
detail_path = manga_path / "detail.json"
if not detail_path.exists():
print(f" Skipping {manga_name}: no detail.json")
return
detail = json.loads(detail_path.read_text(encoding="utf-8"))
title = detail.get("mg-title", manga_name)
slug = manga_name
genres = detail.get("mg-genres", [])
description = detail.get("mg-description", "")
genre = ", ".join(genres) if genres else "Drama"
cur = conn.cursor()
# Cover
cover_file = manga_path / "cover.jpg"
cover_url = ""
cover_key = f"manga/{slug}/cover.webp"
if cover_file.exists():
if not r2_key_exists(cover_key):
cover_url = upload_to_r2(cover_key, make_cover(cover_file))
print(f" Cover uploaded")
else:
cover_url = f"{PUBLIC_URL}/{cover_key}"
# Manga record
cur.execute('SELECT id, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
row = cur.fetchone()
if row:
manga_id, existing_cover = row
if cover_url and cover_url != existing_cover:
cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id))
conn.commit()
else:
cur.execute(
'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
"VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id",
(title, description, cover_url, slug, genre),
)
manga_id = cur.fetchone()[0]
conn.commit()
print(f" Created manga (id: {manga_id})")
# Chapters
chapter_dirs = sorted(
[d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")],
key=lambda d: parse_chapter_dir(d.name)[0],
)
for chapter_dir in chapter_dirs:
order_num, chapter_title = parse_chapter_dir(chapter_dir.name)
if order_num == 0:
continue
cur.execute('SELECT id FROM "Chapter" WHERE "mangaId" = %s AND number = %s', (manga_id, order_num))
if cur.fetchone():
print(f" [{order_num}] {chapter_title} — skip")
continue
page_files = sorted(
[f for f in chapter_dir.iterdir() if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")],
key=lambda f: int(re.search(r"(\d+)", f.stem).group(1)) if re.search(r"(\d+)", f.stem) else 0,
)
if not page_files:
continue
print(f" [{order_num}] {chapter_title} ({len(page_files)} pages)")
# Upload to R2 first
def process_page(args, _slug=slug, _order=order_num):
j, pf = args
r2_key = f"manga/{_slug}/chapters/{_order}/{j}.webp"
if r2_key_exists(r2_key):
with Image.open(pf) as img:
return j, f"{PUBLIC_URL}/{r2_key}", img.width, img.height
w, h, webp = probe_and_webp(pf)
return j, upload_to_r2(r2_key, webp), w, h
page_urls = {}
done = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
futures = {pool.submit(process_page, (j, f)): j for j, f in enumerate(page_files, 1)}
for future in as_completed(futures):
j, url, w, h = future.result()
page_urls[j] = (url, w, h)
done += 1
print(f" {done}/{len(page_files)}", end="\r")
if not page_urls:
print(f" Upload failed, skip")
continue
# DB records only after R2 upload succeeds
cur.execute(
'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
(manga_id, order_num, chapter_title),
)
chapter_id = cur.fetchone()[0]
insert_pages(cur, chapter_id, page_urls)
conn.commit()
print(f" {len(page_files)} pages uploaded" + " " * 10)
# ── Commands ───────────────────────────────────────────────
def cmd_setup():
print("\n Chrome will open. Solve Cloudflare on:")
print(" 1. m.happymh.com")
print(" 2. Any manga page")
print(" 3. Any reader page\n")
session = get_session()
try:
session.page.goto(BASE_URL, wait_until="commit", timeout=60000)
except Exception:
pass
# Bring Chrome to front for setup
try:
subprocess.Popen(
["osascript", "-e", 'tell application "Google Chrome" to activate'],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
except Exception:
pass
input(" Press ENTER when done... ")
cookies = session.browser.contexts[0].cookies()
cf = [c for c in cookies if c["name"] == "cf_clearance"]
print(f" cf_clearance: {'found' if cf else 'NOT found'}")
hide_chrome()
print()
def cmd_download(manga_url=None, chapter_set=None):
"""Download manga. chapter_set is a set of 1-based indices, or None for all."""
urls = [manga_url] if manga_url else load_manga_urls()
if not urls:
print(" No URLs in manga.json")
return
print(f"\n Downloading {len(urls)} manga(s)... (ESC to stop)\n")
def run(session):
with EscListener() as esc:
for url in urls:
if esc.stop.is_set():
break
slug = slug_from_url(url)
try:
result = load_manga_page(session, slug)
if not result:
continue
chapters, metadata, cover_body = result
if not chapters:
print(" No chapters found.")
continue
print(f" Found {len(chapters)} chapters")
save_manga_local(slug, metadata, cover_body)
existing = get_existing_chapters(CONTENT_DIR / slug)
for i, ch in enumerate(chapters, 1):
if esc.stop.is_set():
break
if chapter_set and i not in chapter_set:
continue
if any(ch["chapterName"] in name for name in existing):
print(f" [{i}/{len(chapters)}] {ch['chapterName']} — skip")
continue
print(f" [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})")
download_chapter(session, slug, i, ch, CONTENT_DIR / slug)
print(f"\n Done: {slug}")
except Exception as e:
print(f"\n Error: {url}: {e}")
import traceback
traceback.print_exc()
with_browser(run)
print("\nDownload complete!")
def cmd_upload(manga_name=None):
if manga_name:
names = [manga_name]
else:
names = list_local_manga()
if not names:
print(" No manga in manga-content/")
return
print(f"\n Uploading {len(names)} manga(s)... (ESC to stop)")
conn = get_db()
try:
with EscListener() as esc:
for name in names:
if esc.stop.is_set():
break
print(f"\n {'='*50}")
print(f" {name}")
print(f" {'='*50}")
upload_manga_to_r2(name, conn)
finally:
conn.close()
print("\nUpload complete!")
def cmd_sync(manga_url=None):
"""Sync: fetch latest chapters, stream directly to R2 (no local save)."""
urls = [manga_url] if manga_url else load_manga_urls()
if not urls:
print(" No URLs in manga.json")
return
conn = get_db()
def run(session):
with EscListener() as esc:
for url in urls:
if esc.stop.is_set():
break
slug = slug_from_url(url)
print(f"\n{'='*60}")
print(f"Syncing: {slug}")
print(f"{'='*60}")
# 1. Load manga page + get chapters
result = load_manga_page(session, slug)
if not result:
continue
chapters, metadata, cover_body = result
if not chapters:
print(" No chapters found.")
continue
print(f" {len(chapters)} chapters on site")
# 2. Ensure manga in DB
cur = conn.cursor()
title = metadata.get("mg-title", slug)
genres = metadata.get("mg-genres", [])
description = metadata.get("mg-description", "")
genre = ", ".join(genres) if genres else "Drama"
# Cover → R2 (from RAM)
cover_url = ""
cover_key = f"manga/{slug}/cover.webp"
if cover_body and len(cover_body) > 100:
if not r2_key_exists(cover_key):
cover_webp = make_cover(io.BytesIO(cover_body))
cover_url = upload_to_r2(cover_key, cover_webp)
print(f" Cover uploaded to R2")
else:
cover_url = f"{PUBLIC_URL}/{cover_key}"
cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (slug,))
row = cur.fetchone()
if row:
manga_id = row[0]
# Refresh metadata fields (cover only updated if we have a new one)
if cover_url:
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'"coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
(title, description, genre, cover_url, manga_id),
)
else:
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'"updatedAt" = NOW() WHERE id = %s',
(title, description, genre, manga_id),
)
conn.commit()
print(f" Updated metadata (genre: {genre})")
else:
cur.execute(
'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
"VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id",
(title, description, cover_url, slug, genre),
)
manga_id = cur.fetchone()[0]
conn.commit()
print(f" Created manga in DB (id: {manga_id})")
# 3. Find chapters missing from DB
cur.execute('SELECT number FROM "Chapter" WHERE "mangaId" = %s', (manga_id,))
existing_numbers = {row[0] for row in cur.fetchall()}
# 3. Collect chapters to sync
todo = [(i, ch) for i, ch in enumerate(chapters, 1) if i not in existing_numbers]
if not todo:
print(" Already up to date!")
continue
print(f" {len(todo)} new chapters to sync")
completed = 0
skipped = 0
for i, ch in todo:
if esc.stop.is_set():
break
ch_name = ch["chapterName"]
print(f" [{i}/{len(chapters)}] {ch_name} (id={ch['id']})")
images, api_info = get_chapter_images(session.page, slug, ch["id"])
if not images and api_info.get("status") == 403:
print(f" CF blocked — run Setup and try again")
esc.stop.set()
break
if not images:
print(f" No images")
skipped += 1
continue
print(f" {len(images)} pages")
page_bytes = fetch_all_pages(session.page, images)
if len(page_bytes) < len(images):
missing = [pn for pn in range(1, len(images) + 1) if pn not in page_bytes]
print(f" Could not fetch pages: {missing}, skipping chapter")
skipped += 1
continue
def upload_one(args, _slug=slug, _i=i):
pn, raw = args
r2_key = f"manga/{_slug}/chapters/{_i}/{pn}.webp"
w, h, webp = probe_and_webp(io.BytesIO(raw))
return pn, upload_to_r2(r2_key, webp), w, h
page_urls = {}
done = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
for pn, r2_url, w, h in pool.map(upload_one, page_bytes.items()):
page_urls[pn] = (r2_url, w, h)
done += 1
print(f" R2: {done}/{len(page_bytes)}", end="\r")
if not page_urls:
skipped += 1
continue
cur.execute(
'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
(manga_id, i, ch_name),
)
chapter_id = cur.fetchone()[0]
insert_pages(cur, chapter_id, page_urls)
conn.commit()
completed += 1
print(f" {len(page_urls)} pages synced" + " " * 20)
time.sleep(REQUEST_DELAY)
print(f" Synced {completed}/{len(todo)} chapters ({skipped} skipped)")
try:
with_browser(run)
finally:
conn.close()
print("\nSync complete!")
def r2_list_prefixes():
"""List manga slugs in R2 by scanning top-level prefixes under manga/."""
_ensure_config()
slugs = set()
paginator = s3.get_paginator("list_objects_v2")
for pg in paginator.paginate(Bucket=BUCKET, Prefix="manga/", Delimiter="/"):
for prefix in pg.get("CommonPrefixes", []):
# "manga/slug/" -> "slug"
slug = prefix["Prefix"].split("/")[1]
if slug:
slugs.add(slug)
return sorted(slugs)
def r2_count_by_prefix(prefix):
"""Count objects under a prefix."""
_ensure_config()
total = 0
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
total += len(pg.get("Contents", []))
return total
def r2_delete_prefix(prefix):
"""Delete all objects under a prefix."""
_ensure_config()
total = 0
batches = []
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
objects = pg.get("Contents", [])
if objects:
batches.append([{"Key": obj["Key"]} for obj in objects])
def delete_batch(keys):
s3.delete_objects(Bucket=BUCKET, Delete={"Objects": keys})
return len(keys)
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
for count in pool.map(delete_batch, batches):
total += count
print(f" {total} deleted", end="\r")
print(f" {total} objects deleted" + " " * 10)
return total
def r2_recompress(slug, quality=65):
"""Download all webp images for a manga, re-encode at lower quality, re-upload."""
_ensure_config()
prefix = f"manga/{slug}/"
keys = []
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
for obj in pg.get("Contents", []):
if obj["Key"].endswith(".webp"):
keys.append(obj["Key"])
if not keys:
print(f" No webp files for {slug}")
return
print(f" {len(keys)} files to recompress (quality={quality})")
saved_total = 0
failed = 0
def recompress_one(key):
try:
original = s3.get_object(Bucket=BUCKET, Key=key)["Body"].read()
new_data = _to_webp_bytes(Image.open(io.BytesIO(original)), quality=quality)
saved = len(original) - len(new_data)
if saved > 0:
s3.put_object(Bucket=BUCKET, Key=key, Body=new_data, ContentType="image/webp")
return saved
return 0
except Exception:
return -1
done = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
for saved in pool.map(recompress_one, keys):
done += 1
if saved < 0:
failed += 1
else:
saved_total += saved
print(f" {done}/{len(keys)} — saved {saved_total // 1024} KB", end="\r")
msg = f" Done: {done}/{len(keys)} processed, {saved_total // (1024 * 1024)} MB saved"
if failed:
msg += f" ({failed} failed)"
print(msg + " " * 10)
# ── TUI ────────────────────────────────────────────────────
def tui_select(title, options, back=True, search=False):
"""Arrow-key menu. Returns selected index or -1."""
items = list(options)
if back:
items.append("[Back]")
menu = TerminalMenu(
items,
title=title,
search_key="/" if search else None,
show_search_hint=search,
)
idx = menu.show()
if idx is None or (back and idx == len(items) - 1):
return -1
return idx
_title_cache = {}
def get_manga_title(slug):
"""Read manga title from detail.json or DB, fallback to slug."""
if slug in _title_cache:
return _title_cache[slug]
# Try local detail.json first
detail_path = CONTENT_DIR / slug / "detail.json"
if detail_path.exists():
try:
detail = json.loads(detail_path.read_text(encoding="utf-8"))
title = detail.get("mg-title")
if title:
_title_cache[slug] = title
return title
except Exception:
pass
# Try database (batch load all titles)
try:
conn = get_db()
cur = conn.cursor()
cur.execute('SELECT slug, title FROM "Manga"')
for row in cur.fetchall():
_title_cache[row[0]] = row[1]
conn.close()
if slug in _title_cache:
return _title_cache[slug]
except Exception:
pass
return slug
def manga_display_name(slug):
"""Format: 'title (slug)' or just 'slug'."""
title = get_manga_title(slug)
if title != slug:
return f"{title} ({slug})"
return slug
def tui_pick_manga_url(include_all=True):
"""Pick manga from manga.json. Shows title + slug."""
urls = load_manga_urls()
if not urls:
print(" No URLs in manga.json")
return None
slugs = [slug_from_url(u) for u in urls]
items = []
if include_all:
items.append("All manga")
items += [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
idx = tui_select("Select manga (/ to search):", items, search=True)
if idx < 0:
return None
if include_all:
if idx == 0:
return "__all__"
return urls[idx - 1]
return urls[idx]
def tui_pick_local(include_all=True):
"""Pick from local manga-content/. Shows title + slug."""
local = list_local_manga()
if not local:
print(" No manga in manga-content/")
return None
items = []
if include_all:
items.append("All manga")
items += [f"{i+1}. {manga_display_name(name)}" for i, name in enumerate(local)]
idx = tui_select("Select manga (/ to search):", items, search=True)
if idx < 0:
return None
if include_all:
if idx == 0:
return "__all__"
return local[idx - 1]
return local[idx]
def tui_pick_r2():
"""Pick manga from R2. Shows title + slug."""
slugs = r2_list_prefixes()
if not slugs:
print(" R2 is empty")
return None
items = [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
idx = tui_select("Select manga (/ to search):", items, search=True)
return slugs[idx] if idx >= 0 else None
def tui_pick_chapters(chapters, slug=None):
"""Multi-select chapter picker. Space to toggle, Enter to confirm.
Existing chapters shown grayed out. Returns set of selected 1-based indices, or None for all."""
# Check which chapters already exist locally
existing = set()
if slug:
existing = get_existing_chapters(CONTENT_DIR / slug)
# Count existing
existing_count = 0
for i, ch in enumerate(chapters, 1):
if any(ch["chapterName"] in name for name in existing):
existing_count += 1
idx = tui_select(f"{len(chapters)} chapters ({existing_count} downloaded)", [
"All chapters (skip existing)",
"Select chapters (space to toggle)",
])
if idx == -1:
return "back"
if idx == 0:
return None # all
items = []
for i, ch in enumerate(chapters, 1):
done = any(ch["chapterName"] in name for name in existing)
label = f"{i}. {ch['chapterName']}"
if done:
label = f"\033[90m{label} [done]\033[0m"
items.append(label)
menu = TerminalMenu(
items,
title="Space=toggle, Enter=confirm, /=search:",
multi_select=True,
show_multi_select_hint=True,
search_key="/",
show_search_hint=True,
)
selected = menu.show()
if selected is None:
return "back"
if isinstance(selected, int):
selected = (selected,)
return {i + 1 for i in selected} # 1-based
def tui_download():
picked = tui_pick_manga_url()
if not picked:
return
if picked == "__all__":
cmd_download()
return
slug = slug_from_url(picked)
print(f"\n Fetching chapters for {slug}...")
def get_chapters(session):
page = session.page
try:
page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
except Exception:
pass
if not wait_for_cloudflare(session):
return None
return fetch_chapters_via_api(session.page, slug)
chapters = with_browser(get_chapters)
if not chapters:
print(" Could not get chapters")
return
result = tui_pick_chapters(chapters, slug=slug)
if result == "back":
return
cmd_download(picked, chapter_set=result)
def tui_upload():
picked = tui_pick_local()
if not picked:
return
if picked == "__all__":
cmd_upload()
else:
cmd_upload(picked)
def tui_sync():
picked = tui_pick_manga_url()
if not picked:
return
if picked == "__all__":
cmd_sync()
else:
cmd_sync(picked)
def tui_edit_manga():
"""Edit manga metadata (title, description, genre, status) in DB."""
try:
conn = get_db()
cur = conn.cursor()
cur.execute('SELECT slug, title FROM "Manga" ORDER BY title')
rows = cur.fetchall()
except Exception as e:
print(f" DB error: {e}")
return
if not rows:
print(" No manga in DB")
conn.close()
return
items = [f"{i+1}. {title} ({slug})" for i, (slug, title) in enumerate(rows)]
sel = tui_select("Select manga to edit (/ to search):", items, search=True)
if sel < 0:
conn.close()
return
slug, _ = rows[sel]
cur.execute('SELECT id, title, description, genre, status, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
row = cur.fetchone()
if not row:
print(" Not found")
conn.close()
return
mid, title, description, genre, status, cover_url = row
while True:
print(f"\n Editing: {slug}")
print(f" title: {title}")
print(f" description: {(description or '')[:80]}{'...' if description and len(description) > 80 else ''}")
print(f" genre: {genre}")
print(f" status: {status}")
print(f" coverUrl: {cover_url}")
idx = tui_select("Edit field", [
"title", "description", "genre", "status", "coverUrl",
"Save & exit", "Discard & exit",
])
if idx == -1 or idx == 6:
print(" Discarded.")
break
if idx == 5:
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'status = %s, "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
(title, description, genre, status, cover_url, mid),
)
conn.commit()
print(" Saved.")
break
if idx == 3: # status
opts = ["PUBLISHED", "DRAFT", "HIDDEN"]
s_idx = tui_select("Status:", opts)
if s_idx >= 0:
status = opts[s_idx]
else:
field_name = ["title", "description", "genre", "status", "coverUrl"][idx]
current = {"title": title, "description": description or "", "genre": genre, "coverUrl": cover_url or ""}[field_name]
print(f" Current: {current}")
new_val = input(f" New {field_name} (empty=keep): ").strip()
if new_val:
if idx == 0: title = new_val
elif idx == 1: description = new_val
elif idx == 2: genre = new_val
elif idx == 4: cover_url = new_val
conn.close()
def _pick_manga_and_chapters(conn, prompt="Select chapters", multi=True):
"""Helper: pick manga from DB, then pick chapter(s). Returns (slug, [(ch_id, ch_num, ch_title), ...]) or None."""
cur = conn.cursor()
cur.execute('SELECT id, slug, title FROM "Manga" ORDER BY title')
mangas = cur.fetchall()
if not mangas:
print(" No manga in DB")
return None
items = [f"{i+1}. {title} ({slug})" for i, (_, slug, title) in enumerate(mangas)]
sel = tui_select("Select manga (/ to search):", items, search=True)
if sel < 0:
return None
manga_id, slug, _ = mangas[sel]
cur.execute('SELECT id, number, title FROM "Chapter" WHERE "mangaId" = %s ORDER BY number', (manga_id,))
chapters = cur.fetchall()
if not chapters:
print(" No chapters in DB for this manga")
return None
if multi:
scope = tui_select(f"{prompt}: {len(chapters)} chapters", [
"All chapters",
"Select specific chapters",
])
if scope == -1:
return None
if scope == 0:
return slug, list(chapters)
items = [f"{num}. {title}" for _, num, title in chapters]
menu = TerminalMenu(
items,
title="Space=toggle, Enter=confirm, /=search:",
multi_select=True,
show_multi_select_hint=True,
search_key="/",
show_search_hint=True,
)
selected = menu.show()
if not selected:
return None
if isinstance(selected, int):
selected = (selected,)
picked = [chapters[i] for i in selected]
else:
items = [f"{num}. {title}" for _, num, title in chapters]
sel = tui_select(f"{prompt} (/ to search):", items, search=True)
if sel < 0:
return None
picked = [chapters[sel]]
return slug, picked
def tui_delete_chapter():
"""Delete specific chapter(s) from R2 + DB."""
try:
conn = get_db()
except Exception as e:
print(f" DB error: {e}")
return
try:
result = _pick_manga_and_chapters(conn, "Select chapters to delete")
if not result:
return
slug, to_delete = result
confirm = input(f" Delete {len(to_delete)} chapter(s) from R2 + DB? [y/N] ").strip().lower()
if confirm != "y":
print(" Cancelled.")
return
cur = conn.cursor()
for ch_id, ch_num, ch_title in to_delete:
print(f" Deleting [{ch_num}] {ch_title}...")
r2_delete_prefix(f"manga/{slug}/chapters/{ch_num}/")
cur.execute('DELETE FROM "Page" WHERE "chapterId" = %s', (ch_id,))
cur.execute('DELETE FROM "Chapter" WHERE id = %s', (ch_id,))
conn.commit()
print(f" Done.")
finally:
conn.close()
def tui_check_missing_pages():
"""Check selected chapters against the site's actual page count and re-upload if mismatched."""
try:
conn = get_db()
except Exception as e:
print(f" DB error: {e}")
return
try:
result = _pick_manga_and_chapters(conn, "Select chapters to check")
if not result:
return
slug, selected_chapters = result
if slug not in [slug_from_url(u) for u in load_manga_urls()]:
print(f" {slug} not in manga.json — cannot re-fetch pages")
return
except Exception:
conn.close()
raise
# Load reader pages and compare site's actual page count vs R2
def run(session):
with EscListener() as esc:
result = load_manga_page(session, slug)
if not result:
return
chapters, _, _ = result
if not chapters:
return
cur2 = conn.cursor()
fixed_dims = 0
reuploaded = 0
print(f"\n Checking {len(selected_chapters)} chapters...")
for ch_id, ch_num, ch_title in selected_chapters:
if esc.stop.is_set():
break
if ch_num > len(chapters):
print(f" [{ch_num}] {ch_title}: out of range on site")
continue
ch = chapters[ch_num - 1]
images, api_info = get_chapter_images(session.page, slug, ch["id"])
if not images:
if api_info.get("status") == 403:
print(f" [{ch_num}] CF blocked — run Setup")
esc.stop.set()
break
print(f" [{ch_num}] {ch_title}: no images from site")
continue
site_count = len(images)
r2_count = r2_count_by_prefix(f"manga/{slug}/chapters/{ch_num}/")
if site_count != r2_count:
print(f" [{ch_num}] {ch_title}: site={site_count}, R2={r2_count} — re-uploading...")
# Re-upload IMMEDIATELY while browser is on this chapter's reader page
page_bytes = fetch_all_pages(session.page, images)
if len(page_bytes) < len(images):
missing = [pn for pn in range(1, len(images) + 1) if pn not in page_bytes]
print(f" Could not fetch pages: {missing}")
for mn in missing:
print(f" page {mn}: {images[mn-1]['url']}")
print(f" Skipping chapter")
continue
def upload_page(args, _slug=slug, _n=ch_num):
pn, raw = args
r2_key = f"manga/{_slug}/chapters/{_n}/{pn}.webp"
with Image.open(io.BytesIO(raw)) as img:
w, h = img.width, img.height
return pn, upload_to_r2(r2_key, convert_to_webp(io.BytesIO(raw))), w, h
page_urls = {}
done = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
for pn, r2_url, w, h in pool.map(upload_page, page_bytes.items()):
page_urls[pn] = (r2_url, w, h)
done += 1
print(f" R2: {done}/{len(page_bytes)}", end="\r")
cur2.execute('DELETE FROM "Page" WHERE "chapterId" = %s', (ch_id,))
for pn in sorted(page_urls):
url, w, h = page_urls[pn]
cur2.execute(
'INSERT INTO "Page" ("chapterId", number, "imageUrl", width, height) VALUES (%s, %s, %s, %s, %s)',
(ch_id, pn, url, w, h),
)
conn.commit()
reuploaded += 1
print(f" {len(page_urls)} pages restored" + " " * 20)
continue
# Count matches — check if DB has valid width/height for all pages
cur2.execute(
'SELECT COUNT(*), '
'COUNT(*) FILTER (WHERE width IS NULL OR width <= 0), '
'COUNT(*) FILTER (WHERE height IS NULL OR height <= 0), '
'MIN(width), MAX(width), MIN(height), MAX(height) '
'FROM "Page" WHERE "chapterId" = %s',
(ch_id,),
)
db_count, bad_w, bad_h, min_w, max_w, min_h, max_h = cur2.fetchone()
bad_count = max(bad_w, bad_h)
if bad_count > 0:
print(f" [{ch_num}] {ch_title}: {bad_count} pages need dims — fixing from R2...")
cur2.execute(
'SELECT id, number FROM "Page" WHERE "chapterId" = %s '
'AND (width IS NULL OR width = 0 OR height IS NULL OR height = 0) '
'ORDER BY number',
(ch_id,),
)
pages = cur2.fetchall()
def read_dims(args, _slug=slug, _n=ch_num):
page_id, pn = args
r2_key = f"manga/{_slug}/chapters/{_n}/{pn}.webp"
try:
data = s3.get_object(Bucket=BUCKET, Key=r2_key)["Body"].read()
with Image.open(io.BytesIO(data)) as img:
return page_id, img.width, img.height
except Exception:
return page_id, None, None
updated = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
for page_id, w, h in pool.map(read_dims, pages):
if w and h:
cur2.execute(
'UPDATE "Page" SET width = %s, height = %s WHERE id = %s',
(w, h, page_id),
)
updated += 1
conn.commit()
fixed_dims += 1
print(f" {updated}/{len(pages)} dims updated")
else:
print(f" [{ch_num}] {ch_title}: {site_count} pages OK (w {min_w}-{max_w}, h {min_h}-{max_h})")
print(f"\n Done: {reuploaded} re-uploaded, {fixed_dims} dim-fixed")
try:
with_browser(run)
finally:
conn.close()
print("\nCheck complete!")
def tui_r2_manage():
while True:
idx = tui_select("R2 / DB Management", [
"Status",
"Edit manga info",
"Delete specific manga",
"Delete specific chapter",
"Check missing pages",
"Clear ALL (R2 + DB)",
"Recompress manga (quality 65)",
])
if idx == -1:
break
elif idx == 0:
_ensure_config()
slug_counts = {}
total = 0
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET):
for obj in pg.get("Contents", []):
total += 1
parts = obj["Key"].split("/")
if len(parts) >= 2 and parts[0] == "manga":
slug_counts[parts[1]] = slug_counts.get(parts[1], 0) + 1
print(f"\n R2: {total} objects, {len(slug_counts)} manga")
for slug in sorted(slug_counts):
print(f" {manga_display_name(slug)}: {slug_counts[slug]} objects")
try:
conn = get_db()
cur = conn.cursor()
cur.execute('SELECT COUNT(*) FROM "Manga"')
mc = cur.fetchone()[0]
cur.execute('SELECT COUNT(*) FROM "Chapter"')
cc = cur.fetchone()[0]
cur.execute('SELECT COUNT(*) FROM "Page"')
pc = cur.fetchone()[0]
print(f" DB: {mc} manga, {cc} chapters, {pc} pages")
conn.close()
except Exception as e:
print(f" DB: {e}")
input("\n Press ENTER...")
elif idx == 1:
tui_edit_manga()
elif idx == 2:
picked = tui_pick_r2()
if not picked:
continue
confirm = input(f" Delete {picked} from R2 + DB? [y/N] ").strip().lower()
if confirm == "y":
r2_delete_prefix(f"manga/{picked}/")
try:
conn = get_db()
cur = conn.cursor()
cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (picked,))
row = cur.fetchone()
if row:
mid = row[0]
cur.execute('DELETE FROM "Page" WHERE "chapterId" IN (SELECT id FROM "Chapter" WHERE "mangaId" = %s)', (mid,))
cur.execute('DELETE FROM "Chapter" WHERE "mangaId" = %s', (mid,))
cur.execute('DELETE FROM "Manga" WHERE id = %s', (mid,))
conn.commit()
print(f" Removed from R2 + DB")
conn.close()
except Exception as e:
print(f" DB error: {e}")
elif idx == 3:
tui_delete_chapter()
elif idx == 4:
tui_check_missing_pages()
elif idx == 5:
confirm = input(" Delete ALL R2 + DB? [y/N] ").strip().lower()
if confirm == "y":
r2_delete_prefix("")
try:
conn = get_db()
cur = conn.cursor()
for t in ['"Page"', '"Chapter"', '"Manga"']:
cur.execute(f"DELETE FROM {t}")
conn.commit()
conn.close()
print(" All cleared")
except Exception as e:
print(f" DB error: {e}")
elif idx == 6:
slugs = r2_list_prefixes()
if not slugs:
print(" R2 is empty")
continue
items = ["All manga"] + [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
sel = tui_select("Recompress which? (quality=65, overwrites originals)", items, search=True)
if sel < 0:
continue
targets = slugs if sel == 0 else [slugs[sel - 1]]
confirm = input(f" Recompress {len(targets)} manga to quality 65? [y/N] ").strip().lower()
if confirm != "y":
continue
for slug in targets:
print(f"\n {manga_display_name(slug)}")
r2_recompress(slug, quality=65)
def main():
try:
while True:
idx = tui_select("Manga Toolkit", [
"Setup (solve Cloudflare)",
"Download",
"Upload (local -> R2)",
"Sync (site -> R2)",
"R2 / DB management",
"Quit",
], back=False)
if idx is None or idx == -1 or idx == 5:
break
elif idx == 0:
cmd_setup()
elif idx == 1:
tui_download()
elif idx == 2:
tui_upload()
elif idx == 3:
tui_sync()
elif idx == 4:
tui_r2_manage()
finally:
close_session()
print("Bye!")
if __name__ == "__main__":
main()