Code: - Defer boto3 client and DATABASE_URL reads to first use via _ensure_config(). Missing .env now prints a friendly "Missing env vars" list and exits instead of KeyError on import. - Auto-detect Chrome binary from CHROME_CANDIDATES (macOS/Linux/Windows paths). Friendly error listing tried paths if none found. - Guard termios/tty imports; EscListener becomes a no-op on Windows. - hide_chrome() is a no-op on non-macOS (osascript only works on Darwin). - with_browser catches target-closed/disconnected errors, resets the session singleton, and retries once before raising. Docs: - Fix claim that page.goto is never used — manga listing uses page.goto, only reader pages use window.location.href. - Correct AppleScript command (full tell-application form). - Clarify "Check missing pages" flow — re-upload is inline; dim-only fix reads bytes from R2 without re-upload. - Add CREATE TABLE statements for Manga/Chapter/Page so schema contract is explicit. - Add "Where to change what" table mapping tasks to code locations. - Document lazy config, cross-platform constraints, and anti-patterns (headless, thread parallelism). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2063 lines
69 KiB
Python
2063 lines
69 KiB
Python
"""
|
|
Manga toolkit — download from m.happymh.com, upload to Cloudflare R2.
|
|
|
|
Usage:
|
|
python manga.py
|
|
"""
|
|
|
|
import io
|
|
import json
|
|
import os
|
|
import platform
|
|
import re
|
|
import select
|
|
import sys
|
|
import time
|
|
import socket
|
|
import subprocess
|
|
import threading
|
|
|
|
IS_MACOS = platform.system() == "Darwin"
|
|
|
|
# POSIX-only TTY modules; EscListener is a no-op on Windows.
|
|
try:
|
|
import termios
|
|
import tty
|
|
_HAS_TERMIOS = True
|
|
except ImportError:
|
|
termios = None
|
|
tty = None
|
|
_HAS_TERMIOS = False
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
|
|
import boto3
|
|
import psycopg2
|
|
from PIL import Image
|
|
from dotenv import load_dotenv
|
|
from playwright.sync_api import sync_playwright
|
|
from simple_term_menu import TerminalMenu
|
|
|
|
load_dotenv()
|
|
|
|
# ── Config ─────────────────────────────────────────────────
|
|
|
|
BASE_URL = "https://m.happymh.com"
|
|
ROOT_DIR = Path(__file__).parent
|
|
CONTENT_DIR = ROOT_DIR / "manga-content"
|
|
MANGA_JSON = ROOT_DIR / "manga.json"
|
|
BROWSER_DATA = ROOT_DIR / ".browser-data"
|
|
CDP_PORT = 9333
|
|
REQUEST_DELAY = 1.5
|
|
UPLOAD_WORKERS = 8
|
|
|
|
CHROME_CANDIDATES = [
|
|
"/Applications/Google Chrome.app/Contents/MacOS/Google Chrome", # macOS
|
|
"/usr/bin/google-chrome", # Linux
|
|
"/usr/bin/google-chrome-stable",
|
|
"/usr/bin/chromium",
|
|
"/usr/bin/chromium-browser",
|
|
r"C:\Program Files\Google\Chrome\Application\chrome.exe", # Windows
|
|
r"C:\Program Files (x86)\Google\Chrome\Application\chrome.exe",
|
|
]
|
|
|
|
|
|
def _find_chrome():
|
|
for p in CHROME_CANDIDATES:
|
|
if Path(p).exists():
|
|
return p
|
|
return None
|
|
|
|
|
|
CHROME_PATH = _find_chrome()
|
|
|
|
|
|
# R2/DB config loaded lazily so missing .env gives a friendly error, not KeyError on import.
|
|
_REQUIRED_ENV = ("R2_ACCOUNT_ID", "R2_ACCESS_KEY", "R2_SECRET_KEY", "R2_BUCKET", "R2_PUBLIC_URL", "DATABASE_URL")
|
|
s3 = None
|
|
BUCKET = None
|
|
PUBLIC_URL = None
|
|
DATABASE_URL = None
|
|
_config_loaded = False
|
|
|
|
|
|
def _ensure_config():
|
|
global s3, BUCKET, PUBLIC_URL, DATABASE_URL, _config_loaded
|
|
if _config_loaded:
|
|
return
|
|
missing = [k for k in _REQUIRED_ENV if not os.environ.get(k)]
|
|
if missing:
|
|
print("Missing env vars (check .env):")
|
|
for k in missing:
|
|
print(f" {k}")
|
|
sys.exit(1)
|
|
s3 = boto3.client(
|
|
"s3",
|
|
endpoint_url=f"https://{os.environ['R2_ACCOUNT_ID']}.r2.cloudflarestorage.com",
|
|
aws_access_key_id=os.environ["R2_ACCESS_KEY"],
|
|
aws_secret_access_key=os.environ["R2_SECRET_KEY"],
|
|
region_name="auto",
|
|
)
|
|
BUCKET = os.environ["R2_BUCKET"]
|
|
PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/")
|
|
DATABASE_URL = os.environ["DATABASE_URL"]
|
|
_config_loaded = True
|
|
|
|
|
|
# ── ESC listener ───────────────────────────────────────────
|
|
|
|
|
|
class EscListener:
|
|
"""Context manager: listens for ESC key in background, sets self.stop event."""
|
|
|
|
def __init__(self):
|
|
self.stop = threading.Event()
|
|
self._thread = None
|
|
self._old = None
|
|
self._fd = None
|
|
|
|
def __enter__(self):
|
|
if not _HAS_TERMIOS or not sys.stdin.isatty():
|
|
return self
|
|
self._fd = sys.stdin.fileno()
|
|
try:
|
|
self._old = termios.tcgetattr(self._fd)
|
|
tty.setcbreak(self._fd)
|
|
except Exception:
|
|
self._old = None
|
|
return self
|
|
self._thread = threading.Thread(target=self._listen, daemon=True)
|
|
self._thread.start()
|
|
return self
|
|
|
|
def _listen(self):
|
|
while not self.stop.is_set():
|
|
try:
|
|
r, _, _ = select.select([sys.stdin], [], [], 0.2)
|
|
if r and sys.stdin.read(1) == "\x1b":
|
|
self.stop.set()
|
|
print("\n ESC pressed — stopping after current item...")
|
|
return
|
|
except Exception:
|
|
return
|
|
|
|
def __exit__(self, *args):
|
|
self.stop.set()
|
|
if self._old is not None:
|
|
try:
|
|
termios.tcsetattr(self._fd, termios.TCSADRAIN, self._old)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
# ── Chrome management ──────────────────────────────────────
|
|
|
|
|
|
def hide_chrome():
|
|
"""Hide Chrome window (macOS only; no-op elsewhere)."""
|
|
if not IS_MACOS:
|
|
return
|
|
try:
|
|
subprocess.Popen(
|
|
["osascript", "-e",
|
|
'tell application "System Events" to set visible of process "Google Chrome" to false'],
|
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def is_port_open(port):
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
return s.connect_ex(("localhost", port)) == 0
|
|
|
|
|
|
def launch_chrome(start_url=None):
|
|
if is_port_open(CDP_PORT):
|
|
return None
|
|
if not CHROME_PATH or not Path(CHROME_PATH).exists():
|
|
print(" Chrome not found. Install Google Chrome or Chromium.")
|
|
print(" Searched:")
|
|
for p in CHROME_CANDIDATES:
|
|
print(f" {p}")
|
|
return None
|
|
cmd = [
|
|
CHROME_PATH,
|
|
f"--remote-debugging-port={CDP_PORT}",
|
|
f"--user-data-dir={BROWSER_DATA}",
|
|
"--no-first-run",
|
|
"--no-default-browser-check",
|
|
"--window-position=0,0",
|
|
"--window-size=800,600",
|
|
"--no-focus-on-navigate",
|
|
]
|
|
if start_url:
|
|
cmd.append(start_url)
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
for _ in range(30):
|
|
if is_port_open(CDP_PORT):
|
|
time.sleep(1)
|
|
hide_chrome()
|
|
return proc
|
|
time.sleep(0.5)
|
|
print(" Chrome failed to start")
|
|
return None
|
|
|
|
|
|
class BrowserSession:
|
|
"""Manages Chrome + CDP lifecycle."""
|
|
|
|
def __init__(self):
|
|
self.chrome_proc = None
|
|
self.playwright = None
|
|
self.browser = None
|
|
self.page = None
|
|
|
|
def start(self):
|
|
self.chrome_proc = launch_chrome()
|
|
self.playwright = sync_playwright().start()
|
|
self.browser = self.playwright.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}")
|
|
context = self.browser.contexts[0]
|
|
self.page = context.pages[0] if context.pages else context.new_page()
|
|
|
|
def close(self):
|
|
try:
|
|
self.browser.close()
|
|
except Exception:
|
|
pass
|
|
if self.chrome_proc:
|
|
self.chrome_proc.terminate()
|
|
if self.playwright:
|
|
self.playwright.stop()
|
|
|
|
|
|
_session_singleton = None
|
|
|
|
|
|
def get_session():
|
|
"""Get or lazy-start the global Chrome session."""
|
|
global _session_singleton
|
|
if _session_singleton is None:
|
|
_session_singleton = BrowserSession()
|
|
_session_singleton.start()
|
|
return _session_singleton
|
|
|
|
|
|
def close_session():
|
|
"""Close the global Chrome session (called on exit)."""
|
|
global _session_singleton
|
|
if _session_singleton is not None:
|
|
_session_singleton.close()
|
|
_session_singleton = None
|
|
|
|
|
|
def with_browser(func):
|
|
"""Run func(session) using the persistent Chrome session.
|
|
If the session crashed (target closed etc.), reset and retry once."""
|
|
session = get_session()
|
|
try:
|
|
return func(session)
|
|
except Exception as e:
|
|
msg = str(e).lower()
|
|
if "target" in msg or "browser" in msg or "closed" in msg or "disconnected" in msg:
|
|
print(" Browser session lost, restarting...")
|
|
close_session()
|
|
return func(get_session())
|
|
raise
|
|
|
|
|
|
# ── Cloudflare ─────────────────────────────────────────────
|
|
|
|
|
|
def _wait_for_cf_on_page(page, timeout=120):
|
|
"""Wait for CF to resolve on a specific page."""
|
|
for i in range(timeout):
|
|
try:
|
|
title = page.title()
|
|
except Exception:
|
|
time.sleep(1)
|
|
continue
|
|
if "Just a moment" in title or "challenge" in page.url:
|
|
time.sleep(1)
|
|
continue
|
|
if title and ("嗨皮漫画" in title or "happymh" in page.url):
|
|
return True
|
|
time.sleep(1)
|
|
return False
|
|
|
|
|
|
def wait_for_cloudflare(session, timeout=120):
|
|
"""Wait for CF to resolve. User solves in the visible browser window."""
|
|
page = session.page
|
|
for i in range(timeout):
|
|
try:
|
|
title = page.title()
|
|
except Exception:
|
|
time.sleep(1)
|
|
continue
|
|
if "Just a moment" in title or "challenge" in page.url:
|
|
if i == 0:
|
|
print(" CF challenge — solve in browser...")
|
|
elif i % 15 == 0:
|
|
print(f" Still waiting for CF... ({i}s)")
|
|
time.sleep(1)
|
|
continue
|
|
if title and ("嗨皮漫画" in title or "happymh" in page.url):
|
|
return True
|
|
time.sleep(1)
|
|
print(" CF timed out.")
|
|
return False
|
|
|
|
|
|
# ── Happymh: chapter fetching ─────────────────────────────
|
|
|
|
|
|
def fetch_chapters_via_api(page, slug):
|
|
result = page.evaluate("""
|
|
async (slug) => {
|
|
const all = [];
|
|
let total = 0;
|
|
for (let p = 1; p <= 30; p++) {
|
|
const url = `/v2.0/apis/manga/chapterByPage?code=${slug}&lang=cn&order=asc&page=${p}&_t=${Date.now()}`;
|
|
try {
|
|
const ctrl = new AbortController();
|
|
setTimeout(() => ctrl.abort(), 10000);
|
|
const r = await fetch(url, { signal: ctrl.signal });
|
|
if (!r.ok) { if (p === 1) return { error: r.status }; break; }
|
|
const json = await r.json();
|
|
if (!json.data) break;
|
|
total = json.data.total || total;
|
|
let items = null;
|
|
for (const val of Object.values(json.data)) {
|
|
if (Array.isArray(val) && val.length > 0) { items = val; break; }
|
|
}
|
|
if (!items || items.length === 0) break;
|
|
for (const ch of items) {
|
|
all.push({ id: String(ch.id || ''), chapterName: ch.chapterName || ch.name || '' });
|
|
}
|
|
if (total && all.length >= total) break;
|
|
} catch (e) {
|
|
if (p === 1) return { error: e.message };
|
|
break;
|
|
}
|
|
}
|
|
return { chapters: all, total };
|
|
}
|
|
""", slug)
|
|
if result and result.get("chapters") and len(result["chapters"]) > 0:
|
|
chapters = result["chapters"]
|
|
total = result.get("total", len(chapters))
|
|
print(f" API: {len(chapters)}/{total} chapters")
|
|
return chapters
|
|
if result and result.get("error"):
|
|
print(f" API error: {result['error']}")
|
|
return None
|
|
|
|
|
|
def fetch_chapters_from_dom(page):
|
|
try:
|
|
page.wait_for_selector("a[href*='/mangaread/']", timeout=15000)
|
|
page.wait_for_timeout(1000)
|
|
except Exception:
|
|
return None
|
|
|
|
for selector in ["text=展开全部", "text=查看全部", "text=全部章节", "text=展开更多", "text=更多"]:
|
|
try:
|
|
btn = page.query_selector(selector)
|
|
if btn and btn.is_visible():
|
|
btn.click()
|
|
page.wait_for_timeout(2000)
|
|
break
|
|
except Exception:
|
|
continue
|
|
|
|
try:
|
|
page.wait_for_selector(".MuiDrawer-paper", timeout=5000)
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
sort_btn = page.query_selector("text=点我改变排序")
|
|
if sort_btn and sort_btn.is_visible():
|
|
sort_btn.click()
|
|
page.wait_for_timeout(2000)
|
|
except Exception:
|
|
pass
|
|
|
|
total = page.evaluate("""
|
|
() => {
|
|
const spans = document.querySelectorAll('.MuiDrawer-paper span');
|
|
for (const s of spans) {
|
|
const m = s.textContent.match(/共(\\d+)个章节/);
|
|
if (m) return parseInt(m[1]);
|
|
}
|
|
return 0;
|
|
}
|
|
""")
|
|
|
|
for _ in range(50):
|
|
count = page.evaluate("document.querySelectorAll('.MuiDrawer-paper a[href*=\"/mangaread/\"]').length")
|
|
if total and count >= total:
|
|
break
|
|
clicked = page.evaluate("""
|
|
() => {
|
|
const walker = document.createTreeWalker(
|
|
document.querySelector('.MuiDrawer-paper') || document.body, NodeFilter.SHOW_TEXT
|
|
);
|
|
while (walker.nextNode()) {
|
|
if (walker.currentNode.textContent.includes('加载更多')) {
|
|
let el = walker.currentNode.parentElement;
|
|
while (el && el.tagName !== 'LI') el = el.parentElement;
|
|
if (el) { el.click(); return true; }
|
|
walker.currentNode.parentElement.click();
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
""")
|
|
if not clicked:
|
|
break
|
|
page.wait_for_timeout(1000)
|
|
|
|
chapters = page.evaluate("""
|
|
() => {
|
|
const container = document.querySelector('.MuiDrawer-paper') || document;
|
|
const links = container.querySelectorAll('a[href*="/mangaread/"]');
|
|
const chapters = [], seen = new Set();
|
|
links.forEach(a => {
|
|
const match = a.getAttribute('href').match(/\\/mangaread\\/[^/]+\\/(\\d+)/);
|
|
if (match && !seen.has(match[1])) {
|
|
seen.add(match[1]);
|
|
const name = a.textContent.trim();
|
|
if (name && name !== '开始阅读') chapters.push({ id: match[1], chapterName: name });
|
|
}
|
|
});
|
|
return chapters;
|
|
}
|
|
""")
|
|
|
|
try:
|
|
page.keyboard.press("Escape")
|
|
except Exception:
|
|
pass
|
|
return chapters if chapters else None
|
|
|
|
|
|
# ── Happymh: metadata & cover ─────────────────────────────
|
|
|
|
|
|
def fetch_metadata(page):
|
|
html_text = page.content()
|
|
metadata = {"mg-url": page.url}
|
|
m = re.search(r'<h2 class="mg-title">(.*?)</h2>', html_text)
|
|
if m:
|
|
metadata["mg-title"] = m.group(1).strip()
|
|
m = re.search(r'<p class="mg-sub-title">.*?<a[^>]*>(.*?)</a>', html_text, re.DOTALL)
|
|
if m:
|
|
metadata["mg-author"] = m.group(1).strip()
|
|
genre_matches = re.findall(r'<p class="mg-cate">.*?</p>', html_text, re.DOTALL)
|
|
if genre_matches:
|
|
metadata["mg-genres"] = re.findall(r'<a[^>]*>(.*?)</a>', genre_matches[0])
|
|
m = re.search(r'<div class="mg-desc">.*?<p[^>]*>(.*?)</p>', html_text, re.DOTALL)
|
|
if m:
|
|
metadata["mg-description"] = m.group(1).strip()
|
|
if not metadata.get("mg-description"):
|
|
m = re.search(r'<mip-showmore[^>]*>(.*?)</mip-showmore>', html_text, re.DOTALL)
|
|
if m:
|
|
desc = re.sub(r'<[^>]+>', '', m.group(1)).strip()
|
|
if desc:
|
|
metadata["mg-description"] = desc
|
|
cover_url = page.evaluate("""
|
|
() => {
|
|
const og = document.querySelector('meta[property="og:image"]');
|
|
if (og) return og.content;
|
|
for (const sel of ['img.mg-cover', 'img[src*="mcover"]']) {
|
|
const img = document.querySelector(sel);
|
|
if (img && img.src) return img.src;
|
|
}
|
|
return null;
|
|
}
|
|
""")
|
|
if cover_url:
|
|
metadata["mg-cover"] = cover_url
|
|
return metadata
|
|
|
|
|
|
# ── Happymh: image download ───────────────────────────────
|
|
|
|
|
|
def _try_get_chapter_images(page, slug, chapter_id):
|
|
"""Single attempt to get chapter images. Returns (images, api_status)."""
|
|
captured_images = []
|
|
api_info = {"found": False, "status": None, "error": None}
|
|
|
|
def on_response(response):
|
|
if "/apis/manga/reading" not in response.url:
|
|
return
|
|
# Only capture our chapter, skip prefetched ones
|
|
if f"cid={chapter_id}" not in response.url and f"cid%3D{chapter_id}" not in response.url:
|
|
return
|
|
# Ignore if we already captured images (prevent duplicate/prefetch)
|
|
if captured_images:
|
|
return
|
|
api_info["found"] = True
|
|
api_info["status"] = response.status
|
|
if response.status != 200:
|
|
api_info["error"] = f"status {response.status}"
|
|
return
|
|
try:
|
|
data = response.json()
|
|
# Verify chapter ID in response body
|
|
resp_cid = str(data.get("data", {}).get("id", ""))
|
|
if resp_cid and resp_cid != str(chapter_id):
|
|
return
|
|
scans = data.get("data", {}).get("scans", [])
|
|
if isinstance(scans, str):
|
|
scans = json.loads(scans)
|
|
for scan in scans:
|
|
if isinstance(scan, dict) and "url" in scan:
|
|
captured_images.append({
|
|
"url": scan["url"],
|
|
"no_referrer": scan.get("r", 0) != 0,
|
|
})
|
|
except Exception as e:
|
|
api_info["error"] = str(e)
|
|
|
|
page.on("response", on_response)
|
|
reader_url = f"{BASE_URL}/mangaread/{slug}/{chapter_id}"
|
|
try:
|
|
page.evaluate(f"window.location.href = '{reader_url}'")
|
|
except Exception:
|
|
pass
|
|
hide_chrome()
|
|
|
|
time.sleep(2)
|
|
try:
|
|
page.evaluate("window.close = () => {}")
|
|
except Exception:
|
|
pass
|
|
|
|
if not _wait_for_cf_on_page(page, timeout=90):
|
|
try:
|
|
page.remove_listener("response", on_response)
|
|
except Exception:
|
|
pass
|
|
return [], api_info
|
|
|
|
deadline = time.time() + 20
|
|
while time.time() < deadline:
|
|
if captured_images:
|
|
break
|
|
try:
|
|
page.wait_for_timeout(500)
|
|
except Exception:
|
|
break
|
|
|
|
try:
|
|
page.remove_listener("response", on_response)
|
|
except Exception:
|
|
pass
|
|
|
|
if not api_info["found"]:
|
|
print(" API not intercepted")
|
|
elif api_info["error"]:
|
|
print(f" API: {api_info['error']}")
|
|
|
|
# Filter out next-chapter preview images by counting DOM containers
|
|
if captured_images:
|
|
try:
|
|
counts = page.evaluate("""
|
|
() => {
|
|
const all = document.querySelectorAll('[class*="imgContainer"]').length;
|
|
const next = document.querySelectorAll('[class*="imgNext"]').length;
|
|
return { all, next, current: all - next };
|
|
}
|
|
""")
|
|
if counts and counts.get("next", 0) > 0:
|
|
actual = counts["current"]
|
|
if 0 < actual < len(captured_images):
|
|
captured_images = captured_images[:actual]
|
|
except Exception:
|
|
pass
|
|
|
|
# DOM fallback
|
|
if not captured_images:
|
|
try:
|
|
page.wait_for_timeout(3000)
|
|
dom_images = page.evaluate("""
|
|
() => {
|
|
const imgs = document.querySelectorAll('img[src*="http"]');
|
|
const nextImgs = new Set(
|
|
Array.from(document.querySelectorAll('[class*="imgNext"] img'))
|
|
.map(img => img.src)
|
|
);
|
|
const urls = [], seen = new Set();
|
|
imgs.forEach(img => {
|
|
const src = img.src || '';
|
|
if (src && !seen.has(src) && !nextImgs.has(src)
|
|
&& !src.includes('/mcover/')
|
|
&& !src.includes('cloudflare') && !src.includes('.svg')) {
|
|
seen.add(src); urls.push(src);
|
|
}
|
|
});
|
|
return urls;
|
|
}
|
|
""")
|
|
if dom_images:
|
|
print(f" DOM: {len(dom_images)} images")
|
|
for u in dom_images:
|
|
captured_images.append({"url": u, "no_referrer": False})
|
|
except Exception as e:
|
|
print(f" DOM failed: {e}")
|
|
|
|
return captured_images, api_info
|
|
|
|
|
|
def get_chapter_images(page, slug, chapter_id):
|
|
"""Get chapter images using given page. On API 403, returns empty (caller should handle CF)."""
|
|
images, api_info = _try_get_chapter_images(page, slug, chapter_id)
|
|
return images, api_info
|
|
|
|
|
|
def fetch_all_pages(page, images, max_attempts=3):
|
|
"""Fetch all pages with retry using given page. Returns {page_num: bytes}."""
|
|
total = len(images)
|
|
page_bytes = {}
|
|
pending = list(enumerate(images, 1))
|
|
|
|
for attempt in range(1, max_attempts + 1):
|
|
if not pending:
|
|
break
|
|
if attempt > 1:
|
|
time.sleep(2)
|
|
|
|
next_pending = []
|
|
for pn, img in pending:
|
|
body = fetch_image_bytes(page, img)
|
|
if body:
|
|
page_bytes[pn] = body
|
|
else:
|
|
next_pending.append((pn, img))
|
|
time.sleep(0.1)
|
|
pending = next_pending
|
|
|
|
return page_bytes
|
|
|
|
|
|
def _fetch_via_page(page, url, ref_policy):
|
|
try:
|
|
with page.expect_response(lambda r: url.split("?")[0] in r.url, timeout=15000) as resp_info:
|
|
page.evaluate("([u, r]) => fetch(u, { referrerPolicy: r })", [url, ref_policy])
|
|
response = resp_info.value
|
|
if response.status == 200:
|
|
body = response.body()
|
|
if body and len(body) > 100:
|
|
return body
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
|
|
def fetch_image_bytes(page, img):
|
|
"""Fetch image via browser network stack using given page. Tries URL variants on failure."""
|
|
url = img["url"]
|
|
ref_policy = "no-referrer" if img.get("no_referrer") else "origin"
|
|
|
|
# Try original URL
|
|
body = _fetch_via_page(page, url, ref_policy)
|
|
if body:
|
|
return body
|
|
|
|
# Fallback: strip query string (e.g., ?q=50)
|
|
if "?" in url:
|
|
body = _fetch_via_page(page, url.split("?")[0], ref_policy)
|
|
if body:
|
|
return body
|
|
|
|
return None
|
|
|
|
|
|
def download_image(page, img, save_path):
|
|
"""Fetch image and save to disk."""
|
|
if save_path.exists():
|
|
return True
|
|
body = fetch_image_bytes(page, img)
|
|
if body:
|
|
save_path.parent.mkdir(parents=True, exist_ok=True)
|
|
save_path.write_bytes(body)
|
|
return True
|
|
return False
|
|
|
|
|
|
# ── R2 / Upload ────────────────────────────────────────────
|
|
|
|
|
|
WEBP_QUALITY = 75
|
|
|
|
|
|
def _to_webp_bytes(img, quality=WEBP_QUALITY, method=6):
|
|
buf = io.BytesIO()
|
|
img.save(buf, format="WEBP", quality=quality, method=method)
|
|
return buf.getvalue()
|
|
|
|
|
|
def convert_to_webp(source, quality=WEBP_QUALITY):
|
|
return _to_webp_bytes(Image.open(source), quality)
|
|
|
|
|
|
def probe_and_webp(source, quality=WEBP_QUALITY):
|
|
"""Open once; return (width, height, webp_bytes)."""
|
|
with Image.open(source) as img:
|
|
return img.width, img.height, _to_webp_bytes(img, quality)
|
|
|
|
|
|
def insert_pages(cur, chapter_id, page_urls):
|
|
"""page_urls: {page_num: (url, width, height)}. Inserts in page_num order."""
|
|
for pn in sorted(page_urls):
|
|
url, w, h = page_urls[pn]
|
|
cur.execute(
|
|
'INSERT INTO "Page" ("chapterId", number, "imageUrl", width, height) VALUES (%s, %s, %s, %s, %s)',
|
|
(chapter_id, pn, url, w, h),
|
|
)
|
|
|
|
|
|
def make_cover(source, width=400, height=560):
|
|
img = Image.open(source)
|
|
target_ratio = width / height
|
|
img_ratio = img.width / img.height
|
|
if img_ratio > target_ratio:
|
|
new_width = int(img.height * target_ratio)
|
|
left = (img.width - new_width) // 2
|
|
img = img.crop((left, 0, left + new_width, img.height))
|
|
else:
|
|
new_height = int(img.width / target_ratio)
|
|
img = img.crop((0, 0, img.width, new_height))
|
|
img = img.resize((width, height), Image.LANCZOS)
|
|
return _to_webp_bytes(img, quality=80)
|
|
|
|
|
|
def upload_to_r2(key, data, content_type="image/webp"):
|
|
_ensure_config()
|
|
s3.put_object(Bucket=BUCKET, Key=key, Body=data, ContentType=content_type)
|
|
return f"{PUBLIC_URL}/{key}"
|
|
|
|
|
|
def r2_key_exists(key):
|
|
_ensure_config()
|
|
try:
|
|
s3.head_object(Bucket=BUCKET, Key=key)
|
|
return True
|
|
except s3.exceptions.ClientError:
|
|
return False
|
|
|
|
|
|
def get_db():
|
|
_ensure_config()
|
|
conn = psycopg2.connect(DATABASE_URL)
|
|
conn.set_client_encoding("UTF8")
|
|
return conn
|
|
|
|
|
|
def parse_chapter_dir(dir_name):
|
|
m = re.match(r"^(\d+)\s+(.+)$", dir_name)
|
|
if m:
|
|
return int(m.group(1)), m.group(2)
|
|
return 0, dir_name
|
|
|
|
|
|
# ── Helpers ────────────────────────────────────────────────
|
|
|
|
|
|
def load_manga_urls():
|
|
if not MANGA_JSON.exists():
|
|
return []
|
|
data = json.loads(MANGA_JSON.read_text(encoding="utf-8"))
|
|
return data if isinstance(data, list) else []
|
|
|
|
|
|
def slug_from_url(url):
|
|
return urlparse(url).path.strip("/").split("/")[-1]
|
|
|
|
|
|
def get_existing_chapters(manga_dir):
|
|
existing = set()
|
|
if manga_dir.exists():
|
|
for entry in manga_dir.iterdir():
|
|
if entry.is_dir() and any(entry.glob("*.jpg")):
|
|
existing.add(entry.name)
|
|
return existing
|
|
|
|
|
|
def list_local_manga():
|
|
if not CONTENT_DIR.exists():
|
|
return []
|
|
return sorted(d.name for d in CONTENT_DIR.iterdir() if d.is_dir() and not d.name.startswith("."))
|
|
|
|
|
|
# ── Core: download manga ──────────────────────────────────
|
|
|
|
|
|
def load_manga_page(session, slug):
|
|
"""Navigate to manga page, pass CF, return (chapters, metadata, cover_bytes) or None."""
|
|
cover_responses = {}
|
|
|
|
def on_cover(response):
|
|
if "/mcover/" in response.url and response.status == 200:
|
|
try:
|
|
cover_responses[response.url] = response.body()
|
|
except Exception:
|
|
pass
|
|
|
|
page = session.page
|
|
page.on("response", on_cover)
|
|
|
|
print(" Loading manga page...")
|
|
try:
|
|
page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
|
|
except Exception:
|
|
pass
|
|
hide_chrome()
|
|
if not wait_for_cloudflare(session):
|
|
page = session.page
|
|
try:
|
|
page.remove_listener("response", on_cover)
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
page = session.page # may have changed after CF restart
|
|
print(" Fetching chapters...")
|
|
chapters = fetch_chapters_via_api(page, slug)
|
|
if not chapters:
|
|
print(" API failed, trying DOM...")
|
|
chapters = fetch_chapters_from_dom(page)
|
|
|
|
metadata = fetch_metadata(page)
|
|
|
|
# Wait for cover image to be present in DOM (up to 8s)
|
|
cover_url = None
|
|
for _ in range(16):
|
|
cover_url = page.evaluate("""
|
|
() => {
|
|
const sels = ['img.mip-fill-content[src*="mcover"]', 'img[src*="/mcover/"]'];
|
|
for (const s of sels) {
|
|
const img = document.querySelector(s);
|
|
if (img && img.src) return img.src;
|
|
}
|
|
return null;
|
|
}
|
|
""")
|
|
if cover_url:
|
|
break
|
|
page.wait_for_timeout(500)
|
|
|
|
# Give the response another moment to be captured
|
|
if cover_url and cover_url not in cover_responses:
|
|
page.wait_for_timeout(1500)
|
|
|
|
try:
|
|
page.remove_listener("response", on_cover)
|
|
except Exception:
|
|
pass
|
|
|
|
cover_body = None
|
|
if cover_url:
|
|
cover_body = cover_responses.get(cover_url)
|
|
if not cover_body:
|
|
for url, data in cover_responses.items():
|
|
if cover_url.split("?")[0] in url or url.split("?")[0] in cover_url:
|
|
cover_body = data
|
|
break
|
|
|
|
if not cover_body:
|
|
if cover_url:
|
|
print(f" Cover URL found but body not captured ({len(cover_responses)} responses)")
|
|
else:
|
|
print(f" No cover URL found in DOM")
|
|
|
|
return chapters, metadata, cover_body
|
|
|
|
|
|
def save_manga_local(slug, metadata, cover_body):
|
|
"""Save metadata and cover to local manga-content/."""
|
|
manga_dir = CONTENT_DIR / slug
|
|
manga_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
detail_path = manga_dir / "detail.json"
|
|
if metadata:
|
|
existing = {}
|
|
if detail_path.exists():
|
|
try:
|
|
existing = json.loads(detail_path.read_text(encoding="utf-8"))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
existing.update(metadata)
|
|
detail_path.write_text(json.dumps(existing, ensure_ascii=False, indent=4), encoding="utf-8")
|
|
|
|
cover_path = manga_dir / "cover.jpg"
|
|
if not cover_path.exists() and cover_body and len(cover_body) > 100:
|
|
cover_path.write_bytes(cover_body)
|
|
print(f" Cover saved ({len(cover_body)} bytes)")
|
|
|
|
|
|
def download_chapter(session, slug, chapter_index, chapter, manga_dir):
|
|
"""Download a single chapter's images. Returns True if successful."""
|
|
ch_id = chapter["id"]
|
|
ch_name = chapter["chapterName"]
|
|
folder_name = f"{chapter_index} {ch_name}"
|
|
chapter_dir = manga_dir / folder_name
|
|
|
|
images, _ = get_chapter_images(session.page, slug, ch_id)
|
|
if not images:
|
|
print(f" No images")
|
|
return False
|
|
|
|
print(f" {len(images)} pages")
|
|
chapter_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
page_bytes = fetch_all_pages(session.page, images)
|
|
ok = 0
|
|
for pn, body in page_bytes.items():
|
|
save_path = chapter_dir / f"{pn}.jpg"
|
|
save_path.write_bytes(body)
|
|
ok += 1
|
|
|
|
print(f" {ok}/{len(images)} downloaded" + " " * 20)
|
|
|
|
if ok < len(images):
|
|
try:
|
|
chapter_dir.rmdir()
|
|
except Exception:
|
|
pass
|
|
return False
|
|
|
|
time.sleep(REQUEST_DELAY)
|
|
return True
|
|
|
|
|
|
# ── Core: upload manga ────────────────────────────────────
|
|
|
|
|
|
def upload_manga_to_r2(manga_name, conn):
|
|
"""Upload a local manga to R2 and create DB records."""
|
|
manga_path = CONTENT_DIR / manga_name
|
|
detail_path = manga_path / "detail.json"
|
|
|
|
if not detail_path.exists():
|
|
print(f" Skipping {manga_name}: no detail.json")
|
|
return
|
|
|
|
detail = json.loads(detail_path.read_text(encoding="utf-8"))
|
|
title = detail.get("mg-title", manga_name)
|
|
slug = manga_name
|
|
genres = detail.get("mg-genres", [])
|
|
description = detail.get("mg-description", "")
|
|
genre = ", ".join(genres) if genres else "Drama"
|
|
|
|
cur = conn.cursor()
|
|
|
|
# Cover
|
|
cover_file = manga_path / "cover.jpg"
|
|
cover_url = ""
|
|
cover_key = f"manga/{slug}/cover.webp"
|
|
if cover_file.exists():
|
|
if not r2_key_exists(cover_key):
|
|
cover_url = upload_to_r2(cover_key, make_cover(cover_file))
|
|
print(f" Cover uploaded")
|
|
else:
|
|
cover_url = f"{PUBLIC_URL}/{cover_key}"
|
|
|
|
# Manga record
|
|
cur.execute('SELECT id, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
|
|
row = cur.fetchone()
|
|
if row:
|
|
manga_id, existing_cover = row
|
|
if cover_url and cover_url != existing_cover:
|
|
cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id))
|
|
conn.commit()
|
|
else:
|
|
cur.execute(
|
|
'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
|
|
"VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id",
|
|
(title, description, cover_url, slug, genre),
|
|
)
|
|
manga_id = cur.fetchone()[0]
|
|
conn.commit()
|
|
print(f" Created manga (id: {manga_id})")
|
|
|
|
# Chapters
|
|
chapter_dirs = sorted(
|
|
[d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")],
|
|
key=lambda d: parse_chapter_dir(d.name)[0],
|
|
)
|
|
|
|
for chapter_dir in chapter_dirs:
|
|
order_num, chapter_title = parse_chapter_dir(chapter_dir.name)
|
|
if order_num == 0:
|
|
continue
|
|
|
|
cur.execute('SELECT id FROM "Chapter" WHERE "mangaId" = %s AND number = %s', (manga_id, order_num))
|
|
if cur.fetchone():
|
|
print(f" [{order_num}] {chapter_title} — skip")
|
|
continue
|
|
|
|
page_files = sorted(
|
|
[f for f in chapter_dir.iterdir() if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")],
|
|
key=lambda f: int(re.search(r"(\d+)", f.stem).group(1)) if re.search(r"(\d+)", f.stem) else 0,
|
|
)
|
|
if not page_files:
|
|
continue
|
|
|
|
print(f" [{order_num}] {chapter_title} ({len(page_files)} pages)")
|
|
|
|
# Upload to R2 first
|
|
def process_page(args, _slug=slug, _order=order_num):
|
|
j, pf = args
|
|
r2_key = f"manga/{_slug}/chapters/{_order}/{j}.webp"
|
|
if r2_key_exists(r2_key):
|
|
with Image.open(pf) as img:
|
|
return j, f"{PUBLIC_URL}/{r2_key}", img.width, img.height
|
|
w, h, webp = probe_and_webp(pf)
|
|
return j, upload_to_r2(r2_key, webp), w, h
|
|
|
|
page_urls = {}
|
|
done = 0
|
|
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
|
futures = {pool.submit(process_page, (j, f)): j for j, f in enumerate(page_files, 1)}
|
|
for future in as_completed(futures):
|
|
j, url, w, h = future.result()
|
|
page_urls[j] = (url, w, h)
|
|
done += 1
|
|
print(f" {done}/{len(page_files)}", end="\r")
|
|
|
|
if not page_urls:
|
|
print(f" Upload failed, skip")
|
|
continue
|
|
|
|
# DB records only after R2 upload succeeds
|
|
cur.execute(
|
|
'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
|
|
(manga_id, order_num, chapter_title),
|
|
)
|
|
chapter_id = cur.fetchone()[0]
|
|
insert_pages(cur, chapter_id, page_urls)
|
|
conn.commit()
|
|
print(f" {len(page_files)} pages uploaded" + " " * 10)
|
|
|
|
|
|
# ── Commands ───────────────────────────────────────────────
|
|
|
|
|
|
def cmd_setup():
|
|
print("\n Chrome will open. Solve Cloudflare on:")
|
|
print(" 1. m.happymh.com")
|
|
print(" 2. Any manga page")
|
|
print(" 3. Any reader page\n")
|
|
|
|
session = get_session()
|
|
try:
|
|
session.page.goto(BASE_URL, wait_until="commit", timeout=60000)
|
|
except Exception:
|
|
pass
|
|
|
|
# Bring Chrome to front for setup
|
|
try:
|
|
subprocess.Popen(
|
|
["osascript", "-e", 'tell application "Google Chrome" to activate'],
|
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
input(" Press ENTER when done... ")
|
|
|
|
cookies = session.browser.contexts[0].cookies()
|
|
cf = [c for c in cookies if c["name"] == "cf_clearance"]
|
|
print(f" cf_clearance: {'found' if cf else 'NOT found'}")
|
|
hide_chrome()
|
|
print()
|
|
|
|
|
|
def cmd_download(manga_url=None, chapter_set=None):
|
|
"""Download manga. chapter_set is a set of 1-based indices, or None for all."""
|
|
urls = [manga_url] if manga_url else load_manga_urls()
|
|
if not urls:
|
|
print(" No URLs in manga.json")
|
|
return
|
|
|
|
print(f"\n Downloading {len(urls)} manga(s)... (ESC to stop)\n")
|
|
|
|
def run(session):
|
|
with EscListener() as esc:
|
|
for url in urls:
|
|
if esc.stop.is_set():
|
|
break
|
|
slug = slug_from_url(url)
|
|
try:
|
|
result = load_manga_page(session, slug)
|
|
if not result:
|
|
continue
|
|
chapters, metadata, cover_body = result
|
|
if not chapters:
|
|
print(" No chapters found.")
|
|
continue
|
|
print(f" Found {len(chapters)} chapters")
|
|
save_manga_local(slug, metadata, cover_body)
|
|
|
|
existing = get_existing_chapters(CONTENT_DIR / slug)
|
|
|
|
for i, ch in enumerate(chapters, 1):
|
|
if esc.stop.is_set():
|
|
break
|
|
if chapter_set and i not in chapter_set:
|
|
continue
|
|
if any(ch["chapterName"] in name for name in existing):
|
|
print(f" [{i}/{len(chapters)}] {ch['chapterName']} — skip")
|
|
continue
|
|
print(f" [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})")
|
|
download_chapter(session, slug, i, ch, CONTENT_DIR / slug)
|
|
print(f"\n Done: {slug}")
|
|
except Exception as e:
|
|
print(f"\n Error: {url}: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
with_browser(run)
|
|
print("\nDownload complete!")
|
|
|
|
|
|
def cmd_upload(manga_name=None):
|
|
if manga_name:
|
|
names = [manga_name]
|
|
else:
|
|
names = list_local_manga()
|
|
if not names:
|
|
print(" No manga in manga-content/")
|
|
return
|
|
|
|
print(f"\n Uploading {len(names)} manga(s)... (ESC to stop)")
|
|
conn = get_db()
|
|
try:
|
|
with EscListener() as esc:
|
|
for name in names:
|
|
if esc.stop.is_set():
|
|
break
|
|
print(f"\n {'='*50}")
|
|
print(f" {name}")
|
|
print(f" {'='*50}")
|
|
upload_manga_to_r2(name, conn)
|
|
finally:
|
|
conn.close()
|
|
print("\nUpload complete!")
|
|
|
|
|
|
def cmd_sync(manga_url=None):
|
|
"""Sync: fetch latest chapters, stream directly to R2 (no local save)."""
|
|
urls = [manga_url] if manga_url else load_manga_urls()
|
|
if not urls:
|
|
print(" No URLs in manga.json")
|
|
return
|
|
|
|
conn = get_db()
|
|
|
|
def run(session):
|
|
with EscListener() as esc:
|
|
for url in urls:
|
|
if esc.stop.is_set():
|
|
break
|
|
slug = slug_from_url(url)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Syncing: {slug}")
|
|
print(f"{'='*60}")
|
|
|
|
# 1. Load manga page + get chapters
|
|
result = load_manga_page(session, slug)
|
|
if not result:
|
|
continue
|
|
chapters, metadata, cover_body = result
|
|
if not chapters:
|
|
print(" No chapters found.")
|
|
continue
|
|
print(f" {len(chapters)} chapters on site")
|
|
|
|
# 2. Ensure manga in DB
|
|
cur = conn.cursor()
|
|
title = metadata.get("mg-title", slug)
|
|
genres = metadata.get("mg-genres", [])
|
|
description = metadata.get("mg-description", "")
|
|
genre = ", ".join(genres) if genres else "Drama"
|
|
|
|
# Cover → R2 (from RAM)
|
|
cover_url = ""
|
|
cover_key = f"manga/{slug}/cover.webp"
|
|
if cover_body and len(cover_body) > 100:
|
|
if not r2_key_exists(cover_key):
|
|
cover_webp = make_cover(io.BytesIO(cover_body))
|
|
cover_url = upload_to_r2(cover_key, cover_webp)
|
|
print(f" Cover uploaded to R2")
|
|
else:
|
|
cover_url = f"{PUBLIC_URL}/{cover_key}"
|
|
|
|
cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (slug,))
|
|
row = cur.fetchone()
|
|
if row:
|
|
manga_id = row[0]
|
|
# Refresh metadata fields (cover only updated if we have a new one)
|
|
if cover_url:
|
|
cur.execute(
|
|
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
|
|
'"coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
|
|
(title, description, genre, cover_url, manga_id),
|
|
)
|
|
else:
|
|
cur.execute(
|
|
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
|
|
'"updatedAt" = NOW() WHERE id = %s',
|
|
(title, description, genre, manga_id),
|
|
)
|
|
conn.commit()
|
|
print(f" Updated metadata (genre: {genre})")
|
|
else:
|
|
cur.execute(
|
|
'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
|
|
"VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id",
|
|
(title, description, cover_url, slug, genre),
|
|
)
|
|
manga_id = cur.fetchone()[0]
|
|
conn.commit()
|
|
print(f" Created manga in DB (id: {manga_id})")
|
|
|
|
# 3. Find chapters missing from DB
|
|
cur.execute('SELECT number FROM "Chapter" WHERE "mangaId" = %s', (manga_id,))
|
|
existing_numbers = {row[0] for row in cur.fetchall()}
|
|
|
|
# 3. Collect chapters to sync
|
|
todo = [(i, ch) for i, ch in enumerate(chapters, 1) if i not in existing_numbers]
|
|
|
|
if not todo:
|
|
print(" Already up to date!")
|
|
continue
|
|
|
|
print(f" {len(todo)} new chapters to sync")
|
|
|
|
completed = 0
|
|
skipped = 0
|
|
for i, ch in todo:
|
|
if esc.stop.is_set():
|
|
break
|
|
ch_name = ch["chapterName"]
|
|
print(f" [{i}/{len(chapters)}] {ch_name} (id={ch['id']})")
|
|
|
|
images, api_info = get_chapter_images(session.page, slug, ch["id"])
|
|
if not images and api_info.get("status") == 403:
|
|
print(f" CF blocked — run Setup and try again")
|
|
esc.stop.set()
|
|
break
|
|
if not images:
|
|
print(f" No images")
|
|
skipped += 1
|
|
continue
|
|
|
|
print(f" {len(images)} pages")
|
|
page_bytes = fetch_all_pages(session.page, images)
|
|
if len(page_bytes) < len(images):
|
|
missing = [pn for pn in range(1, len(images) + 1) if pn not in page_bytes]
|
|
print(f" Could not fetch pages: {missing}, skipping chapter")
|
|
skipped += 1
|
|
continue
|
|
|
|
def upload_one(args, _slug=slug, _i=i):
|
|
pn, raw = args
|
|
r2_key = f"manga/{_slug}/chapters/{_i}/{pn}.webp"
|
|
w, h, webp = probe_and_webp(io.BytesIO(raw))
|
|
return pn, upload_to_r2(r2_key, webp), w, h
|
|
|
|
page_urls = {}
|
|
done = 0
|
|
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
|
for pn, r2_url, w, h in pool.map(upload_one, page_bytes.items()):
|
|
page_urls[pn] = (r2_url, w, h)
|
|
done += 1
|
|
print(f" R2: {done}/{len(page_bytes)}", end="\r")
|
|
|
|
if not page_urls:
|
|
skipped += 1
|
|
continue
|
|
|
|
cur.execute(
|
|
'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
|
|
(manga_id, i, ch_name),
|
|
)
|
|
chapter_id = cur.fetchone()[0]
|
|
insert_pages(cur, chapter_id, page_urls)
|
|
conn.commit()
|
|
completed += 1
|
|
print(f" {len(page_urls)} pages synced" + " " * 20)
|
|
time.sleep(REQUEST_DELAY)
|
|
|
|
print(f" Synced {completed}/{len(todo)} chapters ({skipped} skipped)")
|
|
|
|
try:
|
|
with_browser(run)
|
|
finally:
|
|
conn.close()
|
|
|
|
print("\nSync complete!")
|
|
|
|
|
|
def r2_list_prefixes():
|
|
"""List manga slugs in R2 by scanning top-level prefixes under manga/."""
|
|
_ensure_config()
|
|
slugs = set()
|
|
paginator = s3.get_paginator("list_objects_v2")
|
|
for pg in paginator.paginate(Bucket=BUCKET, Prefix="manga/", Delimiter="/"):
|
|
for prefix in pg.get("CommonPrefixes", []):
|
|
# "manga/slug/" -> "slug"
|
|
slug = prefix["Prefix"].split("/")[1]
|
|
if slug:
|
|
slugs.add(slug)
|
|
return sorted(slugs)
|
|
|
|
|
|
def r2_count_by_prefix(prefix):
|
|
"""Count objects under a prefix."""
|
|
_ensure_config()
|
|
total = 0
|
|
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
|
|
total += len(pg.get("Contents", []))
|
|
return total
|
|
|
|
|
|
def r2_delete_prefix(prefix):
|
|
"""Delete all objects under a prefix."""
|
|
_ensure_config()
|
|
total = 0
|
|
batches = []
|
|
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
|
|
objects = pg.get("Contents", [])
|
|
if objects:
|
|
batches.append([{"Key": obj["Key"]} for obj in objects])
|
|
|
|
def delete_batch(keys):
|
|
s3.delete_objects(Bucket=BUCKET, Delete={"Objects": keys})
|
|
return len(keys)
|
|
|
|
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
|
for count in pool.map(delete_batch, batches):
|
|
total += count
|
|
print(f" {total} deleted", end="\r")
|
|
print(f" {total} objects deleted" + " " * 10)
|
|
return total
|
|
|
|
|
|
def r2_recompress(slug, quality=65):
|
|
"""Download all webp images for a manga, re-encode at lower quality, re-upload."""
|
|
_ensure_config()
|
|
prefix = f"manga/{slug}/"
|
|
keys = []
|
|
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
|
|
for obj in pg.get("Contents", []):
|
|
if obj["Key"].endswith(".webp"):
|
|
keys.append(obj["Key"])
|
|
|
|
if not keys:
|
|
print(f" No webp files for {slug}")
|
|
return
|
|
|
|
print(f" {len(keys)} files to recompress (quality={quality})")
|
|
saved_total = 0
|
|
failed = 0
|
|
|
|
def recompress_one(key):
|
|
try:
|
|
original = s3.get_object(Bucket=BUCKET, Key=key)["Body"].read()
|
|
new_data = _to_webp_bytes(Image.open(io.BytesIO(original)), quality=quality)
|
|
saved = len(original) - len(new_data)
|
|
if saved > 0:
|
|
s3.put_object(Bucket=BUCKET, Key=key, Body=new_data, ContentType="image/webp")
|
|
return saved
|
|
return 0
|
|
except Exception:
|
|
return -1
|
|
|
|
done = 0
|
|
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
|
for saved in pool.map(recompress_one, keys):
|
|
done += 1
|
|
if saved < 0:
|
|
failed += 1
|
|
else:
|
|
saved_total += saved
|
|
print(f" {done}/{len(keys)} — saved {saved_total // 1024} KB", end="\r")
|
|
|
|
msg = f" Done: {done}/{len(keys)} processed, {saved_total // (1024 * 1024)} MB saved"
|
|
if failed:
|
|
msg += f" ({failed} failed)"
|
|
print(msg + " " * 10)
|
|
|
|
|
|
# ── TUI ────────────────────────────────────────────────────
|
|
|
|
|
|
def tui_select(title, options, back=True, search=False):
|
|
"""Arrow-key menu. Returns selected index or -1."""
|
|
items = list(options)
|
|
if back:
|
|
items.append("[Back]")
|
|
menu = TerminalMenu(
|
|
items,
|
|
title=title,
|
|
search_key="/" if search else None,
|
|
show_search_hint=search,
|
|
)
|
|
idx = menu.show()
|
|
if idx is None or (back and idx == len(items) - 1):
|
|
return -1
|
|
return idx
|
|
|
|
|
|
_title_cache = {}
|
|
|
|
def get_manga_title(slug):
|
|
"""Read manga title from detail.json or DB, fallback to slug."""
|
|
if slug in _title_cache:
|
|
return _title_cache[slug]
|
|
# Try local detail.json first
|
|
detail_path = CONTENT_DIR / slug / "detail.json"
|
|
if detail_path.exists():
|
|
try:
|
|
detail = json.loads(detail_path.read_text(encoding="utf-8"))
|
|
title = detail.get("mg-title")
|
|
if title:
|
|
_title_cache[slug] = title
|
|
return title
|
|
except Exception:
|
|
pass
|
|
# Try database (batch load all titles)
|
|
try:
|
|
conn = get_db()
|
|
cur = conn.cursor()
|
|
cur.execute('SELECT slug, title FROM "Manga"')
|
|
for row in cur.fetchall():
|
|
_title_cache[row[0]] = row[1]
|
|
conn.close()
|
|
if slug in _title_cache:
|
|
return _title_cache[slug]
|
|
except Exception:
|
|
pass
|
|
return slug
|
|
|
|
|
|
def manga_display_name(slug):
|
|
"""Format: 'title (slug)' or just 'slug'."""
|
|
title = get_manga_title(slug)
|
|
if title != slug:
|
|
return f"{title} ({slug})"
|
|
return slug
|
|
|
|
|
|
def tui_pick_manga_url(include_all=True):
|
|
"""Pick manga from manga.json. Shows title + slug."""
|
|
urls = load_manga_urls()
|
|
if not urls:
|
|
print(" No URLs in manga.json")
|
|
return None
|
|
slugs = [slug_from_url(u) for u in urls]
|
|
items = []
|
|
if include_all:
|
|
items.append("All manga")
|
|
items += [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
|
|
idx = tui_select("Select manga (/ to search):", items, search=True)
|
|
if idx < 0:
|
|
return None
|
|
if include_all:
|
|
if idx == 0:
|
|
return "__all__"
|
|
return urls[idx - 1]
|
|
return urls[idx]
|
|
|
|
|
|
def tui_pick_local(include_all=True):
|
|
"""Pick from local manga-content/. Shows title + slug."""
|
|
local = list_local_manga()
|
|
if not local:
|
|
print(" No manga in manga-content/")
|
|
return None
|
|
items = []
|
|
if include_all:
|
|
items.append("All manga")
|
|
items += [f"{i+1}. {manga_display_name(name)}" for i, name in enumerate(local)]
|
|
idx = tui_select("Select manga (/ to search):", items, search=True)
|
|
if idx < 0:
|
|
return None
|
|
if include_all:
|
|
if idx == 0:
|
|
return "__all__"
|
|
return local[idx - 1]
|
|
return local[idx]
|
|
|
|
|
|
def tui_pick_r2():
|
|
"""Pick manga from R2. Shows title + slug."""
|
|
slugs = r2_list_prefixes()
|
|
if not slugs:
|
|
print(" R2 is empty")
|
|
return None
|
|
items = [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
|
|
idx = tui_select("Select manga (/ to search):", items, search=True)
|
|
return slugs[idx] if idx >= 0 else None
|
|
|
|
|
|
def tui_pick_chapters(chapters, slug=None):
|
|
"""Multi-select chapter picker. Space to toggle, Enter to confirm.
|
|
Existing chapters shown grayed out. Returns set of selected 1-based indices, or None for all."""
|
|
# Check which chapters already exist locally
|
|
existing = set()
|
|
if slug:
|
|
existing = get_existing_chapters(CONTENT_DIR / slug)
|
|
|
|
# Count existing
|
|
existing_count = 0
|
|
for i, ch in enumerate(chapters, 1):
|
|
if any(ch["chapterName"] in name for name in existing):
|
|
existing_count += 1
|
|
|
|
idx = tui_select(f"{len(chapters)} chapters ({existing_count} downloaded)", [
|
|
"All chapters (skip existing)",
|
|
"Select chapters (space to toggle)",
|
|
])
|
|
if idx == -1:
|
|
return "back"
|
|
if idx == 0:
|
|
return None # all
|
|
|
|
items = []
|
|
for i, ch in enumerate(chapters, 1):
|
|
done = any(ch["chapterName"] in name for name in existing)
|
|
label = f"{i}. {ch['chapterName']}"
|
|
if done:
|
|
label = f"\033[90m{label} [done]\033[0m"
|
|
items.append(label)
|
|
|
|
menu = TerminalMenu(
|
|
items,
|
|
title="Space=toggle, Enter=confirm, /=search:",
|
|
multi_select=True,
|
|
show_multi_select_hint=True,
|
|
search_key="/",
|
|
show_search_hint=True,
|
|
)
|
|
selected = menu.show()
|
|
if selected is None:
|
|
return "back"
|
|
if isinstance(selected, int):
|
|
selected = (selected,)
|
|
return {i + 1 for i in selected} # 1-based
|
|
|
|
|
|
def tui_download():
|
|
picked = tui_pick_manga_url()
|
|
if not picked:
|
|
return
|
|
if picked == "__all__":
|
|
cmd_download()
|
|
return
|
|
|
|
slug = slug_from_url(picked)
|
|
print(f"\n Fetching chapters for {slug}...")
|
|
|
|
def get_chapters(session):
|
|
page = session.page
|
|
try:
|
|
page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
|
|
except Exception:
|
|
pass
|
|
if not wait_for_cloudflare(session):
|
|
return None
|
|
return fetch_chapters_via_api(session.page, slug)
|
|
|
|
chapters = with_browser(get_chapters)
|
|
|
|
if not chapters:
|
|
print(" Could not get chapters")
|
|
return
|
|
|
|
result = tui_pick_chapters(chapters, slug=slug)
|
|
if result == "back":
|
|
return
|
|
cmd_download(picked, chapter_set=result)
|
|
|
|
|
|
def tui_upload():
|
|
picked = tui_pick_local()
|
|
if not picked:
|
|
return
|
|
if picked == "__all__":
|
|
cmd_upload()
|
|
else:
|
|
cmd_upload(picked)
|
|
|
|
|
|
def tui_sync():
|
|
picked = tui_pick_manga_url()
|
|
if not picked:
|
|
return
|
|
if picked == "__all__":
|
|
cmd_sync()
|
|
else:
|
|
cmd_sync(picked)
|
|
|
|
|
|
def tui_edit_manga():
|
|
"""Edit manga metadata (title, description, genre, status) in DB."""
|
|
try:
|
|
conn = get_db()
|
|
cur = conn.cursor()
|
|
cur.execute('SELECT slug, title FROM "Manga" ORDER BY title')
|
|
rows = cur.fetchall()
|
|
except Exception as e:
|
|
print(f" DB error: {e}")
|
|
return
|
|
|
|
if not rows:
|
|
print(" No manga in DB")
|
|
conn.close()
|
|
return
|
|
|
|
items = [f"{i+1}. {title} ({slug})" for i, (slug, title) in enumerate(rows)]
|
|
sel = tui_select("Select manga to edit (/ to search):", items, search=True)
|
|
if sel < 0:
|
|
conn.close()
|
|
return
|
|
|
|
slug, _ = rows[sel]
|
|
cur.execute('SELECT id, title, description, genre, status, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
|
|
row = cur.fetchone()
|
|
if not row:
|
|
print(" Not found")
|
|
conn.close()
|
|
return
|
|
mid, title, description, genre, status, cover_url = row
|
|
|
|
while True:
|
|
print(f"\n Editing: {slug}")
|
|
print(f" title: {title}")
|
|
print(f" description: {(description or '')[:80]}{'...' if description and len(description) > 80 else ''}")
|
|
print(f" genre: {genre}")
|
|
print(f" status: {status}")
|
|
print(f" coverUrl: {cover_url}")
|
|
|
|
idx = tui_select("Edit field", [
|
|
"title", "description", "genre", "status", "coverUrl",
|
|
"Save & exit", "Discard & exit",
|
|
])
|
|
if idx == -1 or idx == 6:
|
|
print(" Discarded.")
|
|
break
|
|
if idx == 5:
|
|
cur.execute(
|
|
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
|
|
'status = %s, "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
|
|
(title, description, genre, status, cover_url, mid),
|
|
)
|
|
conn.commit()
|
|
print(" Saved.")
|
|
break
|
|
|
|
if idx == 3: # status
|
|
opts = ["PUBLISHED", "DRAFT", "HIDDEN"]
|
|
s_idx = tui_select("Status:", opts)
|
|
if s_idx >= 0:
|
|
status = opts[s_idx]
|
|
else:
|
|
field_name = ["title", "description", "genre", "status", "coverUrl"][idx]
|
|
current = {"title": title, "description": description or "", "genre": genre, "coverUrl": cover_url or ""}[field_name]
|
|
print(f" Current: {current}")
|
|
new_val = input(f" New {field_name} (empty=keep): ").strip()
|
|
if new_val:
|
|
if idx == 0: title = new_val
|
|
elif idx == 1: description = new_val
|
|
elif idx == 2: genre = new_val
|
|
elif idx == 4: cover_url = new_val
|
|
|
|
conn.close()
|
|
|
|
|
|
def _pick_manga_and_chapters(conn, prompt="Select chapters", multi=True):
|
|
"""Helper: pick manga from DB, then pick chapter(s). Returns (slug, [(ch_id, ch_num, ch_title), ...]) or None."""
|
|
cur = conn.cursor()
|
|
cur.execute('SELECT id, slug, title FROM "Manga" ORDER BY title')
|
|
mangas = cur.fetchall()
|
|
if not mangas:
|
|
print(" No manga in DB")
|
|
return None
|
|
|
|
items = [f"{i+1}. {title} ({slug})" for i, (_, slug, title) in enumerate(mangas)]
|
|
sel = tui_select("Select manga (/ to search):", items, search=True)
|
|
if sel < 0:
|
|
return None
|
|
manga_id, slug, _ = mangas[sel]
|
|
|
|
cur.execute('SELECT id, number, title FROM "Chapter" WHERE "mangaId" = %s ORDER BY number', (manga_id,))
|
|
chapters = cur.fetchall()
|
|
if not chapters:
|
|
print(" No chapters in DB for this manga")
|
|
return None
|
|
|
|
if multi:
|
|
scope = tui_select(f"{prompt}: {len(chapters)} chapters", [
|
|
"All chapters",
|
|
"Select specific chapters",
|
|
])
|
|
if scope == -1:
|
|
return None
|
|
if scope == 0:
|
|
return slug, list(chapters)
|
|
|
|
items = [f"{num}. {title}" for _, num, title in chapters]
|
|
menu = TerminalMenu(
|
|
items,
|
|
title="Space=toggle, Enter=confirm, /=search:",
|
|
multi_select=True,
|
|
show_multi_select_hint=True,
|
|
search_key="/",
|
|
show_search_hint=True,
|
|
)
|
|
selected = menu.show()
|
|
if not selected:
|
|
return None
|
|
if isinstance(selected, int):
|
|
selected = (selected,)
|
|
picked = [chapters[i] for i in selected]
|
|
else:
|
|
items = [f"{num}. {title}" for _, num, title in chapters]
|
|
sel = tui_select(f"{prompt} (/ to search):", items, search=True)
|
|
if sel < 0:
|
|
return None
|
|
picked = [chapters[sel]]
|
|
|
|
return slug, picked
|
|
|
|
|
|
def tui_delete_chapter():
|
|
"""Delete specific chapter(s) from R2 + DB."""
|
|
try:
|
|
conn = get_db()
|
|
except Exception as e:
|
|
print(f" DB error: {e}")
|
|
return
|
|
try:
|
|
result = _pick_manga_and_chapters(conn, "Select chapters to delete")
|
|
if not result:
|
|
return
|
|
slug, to_delete = result
|
|
confirm = input(f" Delete {len(to_delete)} chapter(s) from R2 + DB? [y/N] ").strip().lower()
|
|
if confirm != "y":
|
|
print(" Cancelled.")
|
|
return
|
|
|
|
cur = conn.cursor()
|
|
for ch_id, ch_num, ch_title in to_delete:
|
|
print(f" Deleting [{ch_num}] {ch_title}...")
|
|
r2_delete_prefix(f"manga/{slug}/chapters/{ch_num}/")
|
|
cur.execute('DELETE FROM "Page" WHERE "chapterId" = %s', (ch_id,))
|
|
cur.execute('DELETE FROM "Chapter" WHERE id = %s', (ch_id,))
|
|
conn.commit()
|
|
print(f" Done.")
|
|
finally:
|
|
conn.close()
|
|
|
|
|
|
def tui_check_missing_pages():
|
|
"""Check selected chapters against the site's actual page count and re-upload if mismatched."""
|
|
try:
|
|
conn = get_db()
|
|
except Exception as e:
|
|
print(f" DB error: {e}")
|
|
return
|
|
|
|
try:
|
|
result = _pick_manga_and_chapters(conn, "Select chapters to check")
|
|
if not result:
|
|
return
|
|
slug, selected_chapters = result
|
|
|
|
if slug not in [slug_from_url(u) for u in load_manga_urls()]:
|
|
print(f" {slug} not in manga.json — cannot re-fetch pages")
|
|
return
|
|
except Exception:
|
|
conn.close()
|
|
raise
|
|
|
|
# Load reader pages and compare site's actual page count vs R2
|
|
def run(session):
|
|
with EscListener() as esc:
|
|
result = load_manga_page(session, slug)
|
|
if not result:
|
|
return
|
|
chapters, _, _ = result
|
|
if not chapters:
|
|
return
|
|
|
|
cur2 = conn.cursor()
|
|
fixed_dims = 0
|
|
reuploaded = 0
|
|
|
|
print(f"\n Checking {len(selected_chapters)} chapters...")
|
|
for ch_id, ch_num, ch_title in selected_chapters:
|
|
if esc.stop.is_set():
|
|
break
|
|
if ch_num > len(chapters):
|
|
print(f" [{ch_num}] {ch_title}: out of range on site")
|
|
continue
|
|
|
|
ch = chapters[ch_num - 1]
|
|
images, api_info = get_chapter_images(session.page, slug, ch["id"])
|
|
if not images:
|
|
if api_info.get("status") == 403:
|
|
print(f" [{ch_num}] CF blocked — run Setup")
|
|
esc.stop.set()
|
|
break
|
|
print(f" [{ch_num}] {ch_title}: no images from site")
|
|
continue
|
|
|
|
site_count = len(images)
|
|
r2_count = r2_count_by_prefix(f"manga/{slug}/chapters/{ch_num}/")
|
|
|
|
if site_count != r2_count:
|
|
print(f" [{ch_num}] {ch_title}: site={site_count}, R2={r2_count} — re-uploading...")
|
|
# Re-upload IMMEDIATELY while browser is on this chapter's reader page
|
|
page_bytes = fetch_all_pages(session.page, images)
|
|
if len(page_bytes) < len(images):
|
|
missing = [pn for pn in range(1, len(images) + 1) if pn not in page_bytes]
|
|
print(f" Could not fetch pages: {missing}")
|
|
for mn in missing:
|
|
print(f" page {mn}: {images[mn-1]['url']}")
|
|
print(f" Skipping chapter")
|
|
continue
|
|
|
|
def upload_page(args, _slug=slug, _n=ch_num):
|
|
pn, raw = args
|
|
r2_key = f"manga/{_slug}/chapters/{_n}/{pn}.webp"
|
|
with Image.open(io.BytesIO(raw)) as img:
|
|
w, h = img.width, img.height
|
|
return pn, upload_to_r2(r2_key, convert_to_webp(io.BytesIO(raw))), w, h
|
|
|
|
page_urls = {}
|
|
done = 0
|
|
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
|
for pn, r2_url, w, h in pool.map(upload_page, page_bytes.items()):
|
|
page_urls[pn] = (r2_url, w, h)
|
|
done += 1
|
|
print(f" R2: {done}/{len(page_bytes)}", end="\r")
|
|
|
|
cur2.execute('DELETE FROM "Page" WHERE "chapterId" = %s', (ch_id,))
|
|
for pn in sorted(page_urls):
|
|
url, w, h = page_urls[pn]
|
|
cur2.execute(
|
|
'INSERT INTO "Page" ("chapterId", number, "imageUrl", width, height) VALUES (%s, %s, %s, %s, %s)',
|
|
(ch_id, pn, url, w, h),
|
|
)
|
|
conn.commit()
|
|
reuploaded += 1
|
|
print(f" {len(page_urls)} pages restored" + " " * 20)
|
|
continue
|
|
|
|
# Count matches — check if DB has valid width/height for all pages
|
|
cur2.execute(
|
|
'SELECT COUNT(*), '
|
|
'COUNT(*) FILTER (WHERE width IS NULL OR width <= 0), '
|
|
'COUNT(*) FILTER (WHERE height IS NULL OR height <= 0), '
|
|
'MIN(width), MAX(width), MIN(height), MAX(height) '
|
|
'FROM "Page" WHERE "chapterId" = %s',
|
|
(ch_id,),
|
|
)
|
|
db_count, bad_w, bad_h, min_w, max_w, min_h, max_h = cur2.fetchone()
|
|
bad_count = max(bad_w, bad_h)
|
|
if bad_count > 0:
|
|
print(f" [{ch_num}] {ch_title}: {bad_count} pages need dims — fixing from R2...")
|
|
cur2.execute(
|
|
'SELECT id, number FROM "Page" WHERE "chapterId" = %s '
|
|
'AND (width IS NULL OR width = 0 OR height IS NULL OR height = 0) '
|
|
'ORDER BY number',
|
|
(ch_id,),
|
|
)
|
|
pages = cur2.fetchall()
|
|
|
|
def read_dims(args, _slug=slug, _n=ch_num):
|
|
page_id, pn = args
|
|
r2_key = f"manga/{_slug}/chapters/{_n}/{pn}.webp"
|
|
try:
|
|
data = s3.get_object(Bucket=BUCKET, Key=r2_key)["Body"].read()
|
|
with Image.open(io.BytesIO(data)) as img:
|
|
return page_id, img.width, img.height
|
|
except Exception:
|
|
return page_id, None, None
|
|
|
|
updated = 0
|
|
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
|
for page_id, w, h in pool.map(read_dims, pages):
|
|
if w and h:
|
|
cur2.execute(
|
|
'UPDATE "Page" SET width = %s, height = %s WHERE id = %s',
|
|
(w, h, page_id),
|
|
)
|
|
updated += 1
|
|
conn.commit()
|
|
fixed_dims += 1
|
|
print(f" {updated}/{len(pages)} dims updated")
|
|
else:
|
|
print(f" [{ch_num}] {ch_title}: {site_count} pages OK (w {min_w}-{max_w}, h {min_h}-{max_h})")
|
|
|
|
print(f"\n Done: {reuploaded} re-uploaded, {fixed_dims} dim-fixed")
|
|
|
|
try:
|
|
with_browser(run)
|
|
finally:
|
|
conn.close()
|
|
|
|
print("\nCheck complete!")
|
|
|
|
|
|
def tui_r2_manage():
|
|
while True:
|
|
idx = tui_select("R2 / DB Management", [
|
|
"Status",
|
|
"Edit manga info",
|
|
"Delete specific manga",
|
|
"Delete specific chapter",
|
|
"Check missing pages",
|
|
"Clear ALL (R2 + DB)",
|
|
"Recompress manga (quality 65)",
|
|
])
|
|
if idx == -1:
|
|
break
|
|
|
|
elif idx == 0:
|
|
_ensure_config()
|
|
slug_counts = {}
|
|
total = 0
|
|
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET):
|
|
for obj in pg.get("Contents", []):
|
|
total += 1
|
|
parts = obj["Key"].split("/")
|
|
if len(parts) >= 2 and parts[0] == "manga":
|
|
slug_counts[parts[1]] = slug_counts.get(parts[1], 0) + 1
|
|
print(f"\n R2: {total} objects, {len(slug_counts)} manga")
|
|
for slug in sorted(slug_counts):
|
|
print(f" {manga_display_name(slug)}: {slug_counts[slug]} objects")
|
|
try:
|
|
conn = get_db()
|
|
cur = conn.cursor()
|
|
cur.execute('SELECT COUNT(*) FROM "Manga"')
|
|
mc = cur.fetchone()[0]
|
|
cur.execute('SELECT COUNT(*) FROM "Chapter"')
|
|
cc = cur.fetchone()[0]
|
|
cur.execute('SELECT COUNT(*) FROM "Page"')
|
|
pc = cur.fetchone()[0]
|
|
print(f" DB: {mc} manga, {cc} chapters, {pc} pages")
|
|
conn.close()
|
|
except Exception as e:
|
|
print(f" DB: {e}")
|
|
input("\n Press ENTER...")
|
|
|
|
elif idx == 1:
|
|
tui_edit_manga()
|
|
|
|
elif idx == 2:
|
|
picked = tui_pick_r2()
|
|
if not picked:
|
|
continue
|
|
confirm = input(f" Delete {picked} from R2 + DB? [y/N] ").strip().lower()
|
|
if confirm == "y":
|
|
r2_delete_prefix(f"manga/{picked}/")
|
|
try:
|
|
conn = get_db()
|
|
cur = conn.cursor()
|
|
cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (picked,))
|
|
row = cur.fetchone()
|
|
if row:
|
|
mid = row[0]
|
|
cur.execute('DELETE FROM "Page" WHERE "chapterId" IN (SELECT id FROM "Chapter" WHERE "mangaId" = %s)', (mid,))
|
|
cur.execute('DELETE FROM "Chapter" WHERE "mangaId" = %s', (mid,))
|
|
cur.execute('DELETE FROM "Manga" WHERE id = %s', (mid,))
|
|
conn.commit()
|
|
print(f" Removed from R2 + DB")
|
|
conn.close()
|
|
except Exception as e:
|
|
print(f" DB error: {e}")
|
|
|
|
elif idx == 3:
|
|
tui_delete_chapter()
|
|
|
|
elif idx == 4:
|
|
tui_check_missing_pages()
|
|
|
|
elif idx == 5:
|
|
confirm = input(" Delete ALL R2 + DB? [y/N] ").strip().lower()
|
|
if confirm == "y":
|
|
r2_delete_prefix("")
|
|
try:
|
|
conn = get_db()
|
|
cur = conn.cursor()
|
|
for t in ['"Page"', '"Chapter"', '"Manga"']:
|
|
cur.execute(f"DELETE FROM {t}")
|
|
conn.commit()
|
|
conn.close()
|
|
print(" All cleared")
|
|
except Exception as e:
|
|
print(f" DB error: {e}")
|
|
|
|
elif idx == 6:
|
|
slugs = r2_list_prefixes()
|
|
if not slugs:
|
|
print(" R2 is empty")
|
|
continue
|
|
items = ["All manga"] + [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
|
|
sel = tui_select("Recompress which? (quality=65, overwrites originals)", items, search=True)
|
|
if sel < 0:
|
|
continue
|
|
targets = slugs if sel == 0 else [slugs[sel - 1]]
|
|
confirm = input(f" Recompress {len(targets)} manga to quality 65? [y/N] ").strip().lower()
|
|
if confirm != "y":
|
|
continue
|
|
for slug in targets:
|
|
print(f"\n {manga_display_name(slug)}")
|
|
r2_recompress(slug, quality=65)
|
|
|
|
|
|
def main():
|
|
try:
|
|
while True:
|
|
idx = tui_select("Manga Toolkit", [
|
|
"Setup (solve Cloudflare)",
|
|
"Download",
|
|
"Upload (local -> R2)",
|
|
"Sync (site -> R2)",
|
|
"R2 / DB management",
|
|
"Quit",
|
|
], back=False)
|
|
|
|
if idx is None or idx == -1 or idx == 5:
|
|
break
|
|
elif idx == 0:
|
|
cmd_setup()
|
|
elif idx == 1:
|
|
tui_download()
|
|
elif idx == 2:
|
|
tui_upload()
|
|
elif idx == 3:
|
|
tui_sync()
|
|
elif idx == 4:
|
|
tui_r2_manage()
|
|
finally:
|
|
close_session()
|
|
print("Bye!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|