- Single interactive script (arrow-key TUI via simple-term-menu) replaces download.py, upload.py, and export_cookies.py - Add sync command: streams new chapters site -> R2 directly without saving locally (uses RAM as cache) - Add R2/DB management submenu (status, delete specific, clear all) - Multi-select chapter picker with already-downloaded marked grayed out - Chapter list fetched via /v2.0/apis/manga/chapterByPage with pagination - Cover image captured from page network traffic (no extra fetch) - Filter prefetched next-chapter images via DOM container count - Chrome runs hidden via AppleScript on macOS (except setup mode) - DB records only created after R2 upload succeeds (no orphan rows) - Parallel R2 uploads (8 workers) with WebP method=6 quality=75 - Update CLAUDE.md to reflect new architecture - Add requirements.txt Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1482 lines
49 KiB
Python
1482 lines
49 KiB
Python
"""
|
|
Manga toolkit — download from m.happymh.com, upload to Cloudflare R2.
|
|
|
|
Usage:
|
|
python manga.py
|
|
"""
|
|
|
|
import io
|
|
import json
|
|
import os
|
|
import re
|
|
import time
|
|
import socket
|
|
import subprocess
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
from pathlib import Path
|
|
from urllib.parse import urlparse
|
|
|
|
import boto3
|
|
import psycopg2
|
|
from PIL import Image
|
|
from dotenv import load_dotenv
|
|
from playwright.sync_api import sync_playwright
|
|
from simple_term_menu import TerminalMenu
|
|
|
|
load_dotenv()
|
|
|
|
# ── Config ─────────────────────────────────────────────────
|
|
|
|
BASE_URL = "https://m.happymh.com"
|
|
ROOT_DIR = Path(__file__).parent
|
|
CONTENT_DIR = ROOT_DIR / "manga-content"
|
|
MANGA_JSON = ROOT_DIR / "manga.json"
|
|
BROWSER_DATA = ROOT_DIR / ".browser-data"
|
|
CDP_PORT = 9333
|
|
REQUEST_DELAY = 1.5
|
|
UPLOAD_WORKERS = 8
|
|
CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
|
|
|
|
# R2
|
|
s3 = boto3.client(
|
|
"s3",
|
|
endpoint_url=f"https://{os.environ['R2_ACCOUNT_ID']}.r2.cloudflarestorage.com",
|
|
aws_access_key_id=os.environ["R2_ACCESS_KEY"],
|
|
aws_secret_access_key=os.environ["R2_SECRET_KEY"],
|
|
region_name="auto",
|
|
)
|
|
BUCKET = os.environ["R2_BUCKET"]
|
|
PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/")
|
|
DATABASE_URL = os.environ["DATABASE_URL"]
|
|
|
|
|
|
# ── Chrome management ──────────────────────────────────────
|
|
|
|
|
|
def hide_chrome():
|
|
"""Hide Chrome window on macOS."""
|
|
try:
|
|
subprocess.Popen(
|
|
["osascript", "-e",
|
|
'tell application "System Events" to set visible of process "Google Chrome" to false'],
|
|
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
|
|
def is_port_open(port):
|
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
|
return s.connect_ex(("localhost", port)) == 0
|
|
|
|
|
|
def launch_chrome(start_url=None):
|
|
if is_port_open(CDP_PORT):
|
|
return None
|
|
if not Path(CHROME_PATH).exists():
|
|
print(f" Chrome not found at: {CHROME_PATH}")
|
|
return None
|
|
cmd = [
|
|
CHROME_PATH,
|
|
f"--remote-debugging-port={CDP_PORT}",
|
|
f"--user-data-dir={BROWSER_DATA}",
|
|
"--no-first-run",
|
|
"--no-default-browser-check",
|
|
"--window-position=0,0",
|
|
"--window-size=800,600",
|
|
"--no-focus-on-navigate",
|
|
]
|
|
if start_url:
|
|
cmd.append(start_url)
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
|
|
for _ in range(30):
|
|
if is_port_open(CDP_PORT):
|
|
time.sleep(1)
|
|
hide_chrome()
|
|
return proc
|
|
time.sleep(0.5)
|
|
print(" Chrome failed to start")
|
|
return None
|
|
|
|
|
|
class BrowserSession:
|
|
"""Manages Chrome + CDP lifecycle."""
|
|
|
|
def __init__(self):
|
|
self.chrome_proc = None
|
|
self.playwright = None
|
|
self.browser = None
|
|
self.page = None
|
|
|
|
def start(self):
|
|
self.chrome_proc = launch_chrome()
|
|
self.playwright = sync_playwright().start()
|
|
self.browser = self.playwright.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}")
|
|
context = self.browser.contexts[0]
|
|
self.page = context.pages[0] if context.pages else context.new_page()
|
|
|
|
def close(self):
|
|
try:
|
|
self.browser.close()
|
|
except Exception:
|
|
pass
|
|
if self.chrome_proc:
|
|
self.chrome_proc.terminate()
|
|
if self.playwright:
|
|
self.playwright.stop()
|
|
|
|
|
|
def with_browser(func):
|
|
"""Run func(session) inside a Chrome+CDP session. Returns func's result."""
|
|
session = BrowserSession()
|
|
session.start()
|
|
try:
|
|
return func(session)
|
|
finally:
|
|
session.close()
|
|
|
|
|
|
# ── Cloudflare ─────────────────────────────────────────────
|
|
|
|
|
|
def wait_for_cloudflare(session, timeout=120):
|
|
"""Wait for CF to resolve. User solves in the visible browser window."""
|
|
page = session.page
|
|
for i in range(timeout):
|
|
try:
|
|
title = page.title()
|
|
except Exception:
|
|
time.sleep(1)
|
|
continue
|
|
if "Just a moment" in title or "challenge" in page.url:
|
|
if i == 0:
|
|
print(" CF challenge — solve in browser...")
|
|
elif i % 15 == 0:
|
|
print(f" Still waiting for CF... ({i}s)")
|
|
time.sleep(1)
|
|
continue
|
|
if title and ("嗨皮漫画" in title or "happymh" in page.url):
|
|
return True
|
|
time.sleep(1)
|
|
print(" CF timed out.")
|
|
return False
|
|
|
|
|
|
# ── Happymh: chapter fetching ─────────────────────────────
|
|
|
|
|
|
def fetch_chapters_via_api(page, slug):
|
|
result = page.evaluate("""
|
|
async (slug) => {
|
|
const all = [];
|
|
let total = 0;
|
|
for (let p = 1; p <= 30; p++) {
|
|
const url = `/v2.0/apis/manga/chapterByPage?code=${slug}&lang=cn&order=asc&page=${p}&_t=${Date.now()}`;
|
|
try {
|
|
const ctrl = new AbortController();
|
|
setTimeout(() => ctrl.abort(), 10000);
|
|
const r = await fetch(url, { signal: ctrl.signal });
|
|
if (!r.ok) { if (p === 1) return { error: r.status }; break; }
|
|
const json = await r.json();
|
|
if (!json.data) break;
|
|
total = json.data.total || total;
|
|
let items = null;
|
|
for (const val of Object.values(json.data)) {
|
|
if (Array.isArray(val) && val.length > 0) { items = val; break; }
|
|
}
|
|
if (!items || items.length === 0) break;
|
|
for (const ch of items) {
|
|
all.push({ id: String(ch.id || ''), chapterName: ch.chapterName || ch.name || '' });
|
|
}
|
|
if (total && all.length >= total) break;
|
|
} catch (e) {
|
|
if (p === 1) return { error: e.message };
|
|
break;
|
|
}
|
|
}
|
|
return { chapters: all, total };
|
|
}
|
|
""", slug)
|
|
if result and result.get("chapters") and len(result["chapters"]) > 0:
|
|
chapters = result["chapters"]
|
|
total = result.get("total", len(chapters))
|
|
print(f" API: {len(chapters)}/{total} chapters")
|
|
return chapters
|
|
if result and result.get("error"):
|
|
print(f" API error: {result['error']}")
|
|
return None
|
|
|
|
|
|
def fetch_chapters_from_dom(page):
|
|
try:
|
|
page.wait_for_selector("a[href*='/mangaread/']", timeout=15000)
|
|
page.wait_for_timeout(1000)
|
|
except Exception:
|
|
return None
|
|
|
|
for selector in ["text=展开全部", "text=查看全部", "text=全部章节", "text=展开更多", "text=更多"]:
|
|
try:
|
|
btn = page.query_selector(selector)
|
|
if btn and btn.is_visible():
|
|
btn.click()
|
|
page.wait_for_timeout(2000)
|
|
break
|
|
except Exception:
|
|
continue
|
|
|
|
try:
|
|
page.wait_for_selector(".MuiDrawer-paper", timeout=5000)
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
sort_btn = page.query_selector("text=点我改变排序")
|
|
if sort_btn and sort_btn.is_visible():
|
|
sort_btn.click()
|
|
page.wait_for_timeout(2000)
|
|
except Exception:
|
|
pass
|
|
|
|
total = page.evaluate("""
|
|
() => {
|
|
const spans = document.querySelectorAll('.MuiDrawer-paper span');
|
|
for (const s of spans) {
|
|
const m = s.textContent.match(/共(\\d+)个章节/);
|
|
if (m) return parseInt(m[1]);
|
|
}
|
|
return 0;
|
|
}
|
|
""")
|
|
|
|
for _ in range(50):
|
|
count = page.evaluate("document.querySelectorAll('.MuiDrawer-paper a[href*=\"/mangaread/\"]').length")
|
|
if total and count >= total:
|
|
break
|
|
clicked = page.evaluate("""
|
|
() => {
|
|
const walker = document.createTreeWalker(
|
|
document.querySelector('.MuiDrawer-paper') || document.body, NodeFilter.SHOW_TEXT
|
|
);
|
|
while (walker.nextNode()) {
|
|
if (walker.currentNode.textContent.includes('加载更多')) {
|
|
let el = walker.currentNode.parentElement;
|
|
while (el && el.tagName !== 'LI') el = el.parentElement;
|
|
if (el) { el.click(); return true; }
|
|
walker.currentNode.parentElement.click();
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
""")
|
|
if not clicked:
|
|
break
|
|
page.wait_for_timeout(1000)
|
|
|
|
chapters = page.evaluate("""
|
|
() => {
|
|
const container = document.querySelector('.MuiDrawer-paper') || document;
|
|
const links = container.querySelectorAll('a[href*="/mangaread/"]');
|
|
const chapters = [], seen = new Set();
|
|
links.forEach(a => {
|
|
const match = a.getAttribute('href').match(/\\/mangaread\\/[^/]+\\/(\\d+)/);
|
|
if (match && !seen.has(match[1])) {
|
|
seen.add(match[1]);
|
|
const name = a.textContent.trim();
|
|
if (name && name !== '开始阅读') chapters.push({ id: match[1], chapterName: name });
|
|
}
|
|
});
|
|
return chapters;
|
|
}
|
|
""")
|
|
|
|
try:
|
|
page.keyboard.press("Escape")
|
|
except Exception:
|
|
pass
|
|
return chapters if chapters else None
|
|
|
|
|
|
# ── Happymh: metadata & cover ─────────────────────────────
|
|
|
|
|
|
def fetch_metadata(page):
|
|
html_text = page.content()
|
|
metadata = {"mg-url": page.url}
|
|
m = re.search(r'<h2 class="mg-title">(.*?)</h2>', html_text)
|
|
if m:
|
|
metadata["mg-title"] = m.group(1).strip()
|
|
m = re.search(r'<p class="mg-sub-title">.*?<a[^>]*>(.*?)</a>', html_text, re.DOTALL)
|
|
if m:
|
|
metadata["mg-author"] = m.group(1).strip()
|
|
genre_matches = re.findall(r'<p class="mg-cate">.*?</p>', html_text, re.DOTALL)
|
|
if genre_matches:
|
|
metadata["mg-genres"] = re.findall(r'<a[^>]*>(.*?)</a>', genre_matches[0])
|
|
m = re.search(r'<div class="mg-desc">.*?<p[^>]*>(.*?)</p>', html_text, re.DOTALL)
|
|
if m:
|
|
metadata["mg-description"] = m.group(1).strip()
|
|
if not metadata.get("mg-description"):
|
|
m = re.search(r'<mip-showmore[^>]*>(.*?)</mip-showmore>', html_text, re.DOTALL)
|
|
if m:
|
|
desc = re.sub(r'<[^>]+>', '', m.group(1)).strip()
|
|
if desc:
|
|
metadata["mg-description"] = desc
|
|
cover_url = page.evaluate("""
|
|
() => {
|
|
const og = document.querySelector('meta[property="og:image"]');
|
|
if (og) return og.content;
|
|
for (const sel of ['img.mg-cover', 'img[src*="mcover"]']) {
|
|
const img = document.querySelector(sel);
|
|
if (img && img.src) return img.src;
|
|
}
|
|
return null;
|
|
}
|
|
""")
|
|
if cover_url:
|
|
metadata["mg-cover"] = cover_url
|
|
return metadata
|
|
|
|
|
|
# ── Happymh: image download ───────────────────────────────
|
|
|
|
|
|
def _try_get_chapter_images(session, slug, chapter_id):
|
|
"""Single attempt to get chapter images. Returns (images, api_status)."""
|
|
captured_images = []
|
|
api_info = {"found": False, "status": None, "error": None}
|
|
|
|
def on_response(response):
|
|
if "/apis/manga/reading" not in response.url:
|
|
return
|
|
# Only capture our chapter, skip prefetched ones
|
|
if f"cid={chapter_id}" not in response.url and f"cid%3D{chapter_id}" not in response.url:
|
|
return
|
|
# Ignore if we already captured images (prevent duplicate/prefetch)
|
|
if captured_images:
|
|
return
|
|
api_info["found"] = True
|
|
api_info["status"] = response.status
|
|
if response.status != 200:
|
|
api_info["error"] = f"status {response.status}"
|
|
return
|
|
try:
|
|
data = response.json()
|
|
# Verify chapter ID in response body
|
|
resp_cid = str(data.get("data", {}).get("id", ""))
|
|
if resp_cid and resp_cid != str(chapter_id):
|
|
return
|
|
scans = data.get("data", {}).get("scans", [])
|
|
if isinstance(scans, str):
|
|
scans = json.loads(scans)
|
|
for scan in scans:
|
|
if isinstance(scan, dict) and "url" in scan:
|
|
captured_images.append({
|
|
"url": scan["url"],
|
|
"no_referrer": scan.get("r", 0) != 0,
|
|
})
|
|
except Exception as e:
|
|
api_info["error"] = str(e)
|
|
|
|
page = session.page
|
|
page.on("response", on_response)
|
|
reader_url = f"{BASE_URL}/mangaread/{slug}/{chapter_id}"
|
|
print(" Loading reader...")
|
|
try:
|
|
page.evaluate(f"window.location.href = '{reader_url}'")
|
|
except Exception:
|
|
pass
|
|
hide_chrome()
|
|
|
|
time.sleep(2)
|
|
try:
|
|
page.evaluate("window.close = () => {}")
|
|
except Exception:
|
|
pass
|
|
|
|
print(" Waiting for page...")
|
|
if not wait_for_cloudflare(session, timeout=90):
|
|
page = session.page
|
|
try:
|
|
page.remove_listener("response", on_response)
|
|
except Exception:
|
|
pass
|
|
return [], api_info
|
|
|
|
page = session.page
|
|
print(" Waiting for API...")
|
|
deadline = time.time() + 20
|
|
while time.time() < deadline:
|
|
if captured_images:
|
|
break
|
|
try:
|
|
page.wait_for_timeout(500)
|
|
except Exception:
|
|
break
|
|
|
|
try:
|
|
page.remove_listener("response", on_response)
|
|
except Exception:
|
|
pass
|
|
|
|
if not api_info["found"]:
|
|
print(" API not intercepted")
|
|
elif api_info["error"]:
|
|
print(f" API: {api_info['error']}")
|
|
|
|
# Filter out next-chapter preview images by counting DOM containers
|
|
if captured_images:
|
|
try:
|
|
counts = page.evaluate("""
|
|
() => {
|
|
const all = document.querySelectorAll('[class*="imgContainer"]').length;
|
|
const next = document.querySelectorAll('[class*="imgNext"]').length;
|
|
return { all, next, current: all - next };
|
|
}
|
|
""")
|
|
if counts and counts.get("next", 0) > 0:
|
|
actual = counts["current"]
|
|
if 0 < actual < len(captured_images):
|
|
captured_images = captured_images[:actual]
|
|
except Exception:
|
|
pass
|
|
|
|
# DOM fallback
|
|
if not captured_images:
|
|
try:
|
|
page.wait_for_timeout(3000)
|
|
dom_images = page.evaluate("""
|
|
() => {
|
|
const imgs = document.querySelectorAll('img[src*="http"]');
|
|
const nextImgs = new Set(
|
|
Array.from(document.querySelectorAll('[class*="imgNext"] img'))
|
|
.map(img => img.src)
|
|
);
|
|
const urls = [], seen = new Set();
|
|
imgs.forEach(img => {
|
|
const src = img.src || '';
|
|
if (src && !seen.has(src) && !nextImgs.has(src)
|
|
&& !src.includes('/mcover/')
|
|
&& !src.includes('cloudflare') && !src.includes('.svg')) {
|
|
seen.add(src); urls.push(src);
|
|
}
|
|
});
|
|
return urls;
|
|
}
|
|
""")
|
|
if dom_images:
|
|
print(f" DOM: {len(dom_images)} images")
|
|
for u in dom_images:
|
|
captured_images.append({"url": u, "no_referrer": False})
|
|
except Exception as e:
|
|
print(f" DOM failed: {e}")
|
|
|
|
return captured_images, api_info
|
|
|
|
|
|
def get_chapter_images(session, slug, chapter_id):
|
|
"""Get chapter images. On API 403 (CF expired), navigate to solve and retry."""
|
|
images, api_info = _try_get_chapter_images(session, slug, chapter_id)
|
|
if images:
|
|
return images
|
|
|
|
if api_info.get("status") == 403:
|
|
print(" CF expired — solve in browser...")
|
|
page = session.page
|
|
try:
|
|
page.goto(f"{BASE_URL}/mangaread/{slug}/{chapter_id}", wait_until="commit", timeout=60000)
|
|
except Exception:
|
|
pass
|
|
if wait_for_cloudflare(session, timeout=120):
|
|
images, _ = _try_get_chapter_images(session, slug, chapter_id)
|
|
|
|
return images
|
|
|
|
|
|
def fetch_image_bytes(session, img):
|
|
"""Fetch image via browser network stack, return raw bytes or None."""
|
|
page = session.page
|
|
url = img["url"]
|
|
ref_policy = "no-referrer" if img.get("no_referrer") else "origin"
|
|
try:
|
|
with page.expect_response(lambda r: url in r.url, timeout=15000) as resp_info:
|
|
page.evaluate("([u, r]) => fetch(u, { referrerPolicy: r })", [url, ref_policy])
|
|
response = resp_info.value
|
|
if response.status == 200:
|
|
body = response.body()
|
|
if body and len(body) > 100:
|
|
return body
|
|
except Exception as e:
|
|
if not hasattr(fetch_image_bytes, "_err_logged"):
|
|
fetch_image_bytes._err_logged = True
|
|
print(f"\n First error: {e}")
|
|
return None
|
|
|
|
|
|
def download_image(session, img, save_path):
|
|
"""Fetch image and save to disk."""
|
|
if save_path.exists():
|
|
return True
|
|
body = fetch_image_bytes(session, img)
|
|
if body:
|
|
save_path.parent.mkdir(parents=True, exist_ok=True)
|
|
save_path.write_bytes(body)
|
|
return True
|
|
return False
|
|
|
|
|
|
# ── R2 / Upload ────────────────────────────────────────────
|
|
|
|
|
|
WEBP_QUALITY = 75
|
|
|
|
|
|
def _to_webp_bytes(img, quality=WEBP_QUALITY, method=6):
|
|
buf = io.BytesIO()
|
|
img.save(buf, format="WEBP", quality=quality, method=method)
|
|
return buf.getvalue()
|
|
|
|
|
|
def convert_to_webp(source, quality=WEBP_QUALITY):
|
|
return _to_webp_bytes(Image.open(source), quality)
|
|
|
|
|
|
def make_cover(source, width=400, height=560):
|
|
img = Image.open(source)
|
|
target_ratio = width / height
|
|
img_ratio = img.width / img.height
|
|
if img_ratio > target_ratio:
|
|
new_width = int(img.height * target_ratio)
|
|
left = (img.width - new_width) // 2
|
|
img = img.crop((left, 0, left + new_width, img.height))
|
|
else:
|
|
new_height = int(img.width / target_ratio)
|
|
img = img.crop((0, 0, img.width, new_height))
|
|
img = img.resize((width, height), Image.LANCZOS)
|
|
return _to_webp_bytes(img, quality=80)
|
|
|
|
|
|
def upload_to_r2(key, data, content_type="image/webp"):
|
|
s3.put_object(Bucket=BUCKET, Key=key, Body=data, ContentType=content_type)
|
|
return f"{PUBLIC_URL}/{key}"
|
|
|
|
|
|
def r2_key_exists(key):
|
|
try:
|
|
s3.head_object(Bucket=BUCKET, Key=key)
|
|
return True
|
|
except s3.exceptions.ClientError:
|
|
return False
|
|
|
|
|
|
def get_db():
|
|
conn = psycopg2.connect(DATABASE_URL)
|
|
conn.set_client_encoding("UTF8")
|
|
return conn
|
|
|
|
|
|
def parse_chapter_dir(dir_name):
|
|
m = re.match(r"^(\d+)\s+(.+)$", dir_name)
|
|
if m:
|
|
return int(m.group(1)), m.group(2)
|
|
return 0, dir_name
|
|
|
|
|
|
# ── Helpers ────────────────────────────────────────────────
|
|
|
|
|
|
def load_manga_urls():
|
|
if not MANGA_JSON.exists():
|
|
return []
|
|
data = json.loads(MANGA_JSON.read_text(encoding="utf-8"))
|
|
return data if isinstance(data, list) else []
|
|
|
|
|
|
def slug_from_url(url):
|
|
return urlparse(url).path.strip("/").split("/")[-1]
|
|
|
|
|
|
def get_existing_chapters(manga_dir):
|
|
existing = set()
|
|
if manga_dir.exists():
|
|
for entry in manga_dir.iterdir():
|
|
if entry.is_dir() and any(entry.glob("*.jpg")):
|
|
existing.add(entry.name)
|
|
return existing
|
|
|
|
|
|
def list_local_manga():
|
|
if not CONTENT_DIR.exists():
|
|
return []
|
|
return sorted(d.name for d in CONTENT_DIR.iterdir() if d.is_dir() and not d.name.startswith("."))
|
|
|
|
|
|
# ── Core: download manga ──────────────────────────────────
|
|
|
|
|
|
def load_manga_page(session, slug):
|
|
"""Navigate to manga page, pass CF, return (chapters, metadata, cover_bytes) or None."""
|
|
cover_responses = {}
|
|
|
|
def on_cover(response):
|
|
if "/mcover/" in response.url and response.status == 200:
|
|
try:
|
|
cover_responses[response.url] = response.body()
|
|
except Exception:
|
|
pass
|
|
|
|
page = session.page
|
|
page.on("response", on_cover)
|
|
|
|
print(" Loading manga page...")
|
|
try:
|
|
page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
|
|
except Exception:
|
|
pass
|
|
hide_chrome()
|
|
if not wait_for_cloudflare(session):
|
|
page = session.page
|
|
try:
|
|
page.remove_listener("response", on_cover)
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
page = session.page # may have changed after CF restart
|
|
print(" Fetching chapters...")
|
|
chapters = fetch_chapters_via_api(page, slug)
|
|
if not chapters:
|
|
print(" API failed, trying DOM...")
|
|
chapters = fetch_chapters_from_dom(page)
|
|
|
|
metadata = fetch_metadata(page)
|
|
|
|
# Wait for cover image to be present in DOM (up to 8s)
|
|
cover_url = None
|
|
for _ in range(16):
|
|
cover_url = page.evaluate("""
|
|
() => {
|
|
const sels = ['img.mip-fill-content[src*="mcover"]', 'img[src*="/mcover/"]'];
|
|
for (const s of sels) {
|
|
const img = document.querySelector(s);
|
|
if (img && img.src) return img.src;
|
|
}
|
|
return null;
|
|
}
|
|
""")
|
|
if cover_url:
|
|
break
|
|
page.wait_for_timeout(500)
|
|
|
|
# Give the response another moment to be captured
|
|
if cover_url and cover_url not in cover_responses:
|
|
page.wait_for_timeout(1500)
|
|
|
|
try:
|
|
page.remove_listener("response", on_cover)
|
|
except Exception:
|
|
pass
|
|
|
|
cover_body = None
|
|
if cover_url:
|
|
cover_body = cover_responses.get(cover_url)
|
|
if not cover_body:
|
|
for url, data in cover_responses.items():
|
|
if cover_url.split("?")[0] in url or url.split("?")[0] in cover_url:
|
|
cover_body = data
|
|
break
|
|
|
|
if not cover_body:
|
|
if cover_url:
|
|
print(f" Cover URL found but body not captured ({len(cover_responses)} responses)")
|
|
else:
|
|
print(f" No cover URL found in DOM")
|
|
|
|
return chapters, metadata, cover_body
|
|
|
|
|
|
def save_manga_local(slug, metadata, cover_body):
|
|
"""Save metadata and cover to local manga-content/."""
|
|
manga_dir = CONTENT_DIR / slug
|
|
manga_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
detail_path = manga_dir / "detail.json"
|
|
if metadata:
|
|
existing = {}
|
|
if detail_path.exists():
|
|
try:
|
|
existing = json.loads(detail_path.read_text(encoding="utf-8"))
|
|
except json.JSONDecodeError:
|
|
pass
|
|
existing.update(metadata)
|
|
detail_path.write_text(json.dumps(existing, ensure_ascii=False, indent=4), encoding="utf-8")
|
|
|
|
cover_path = manga_dir / "cover.jpg"
|
|
if not cover_path.exists() and cover_body and len(cover_body) > 100:
|
|
cover_path.write_bytes(cover_body)
|
|
print(f" Cover saved ({len(cover_body)} bytes)")
|
|
|
|
|
|
def download_chapter(session, slug, chapter_index, chapter, manga_dir):
|
|
"""Download a single chapter's images. Returns True if successful."""
|
|
ch_id = chapter["id"]
|
|
ch_name = chapter["chapterName"]
|
|
folder_name = f"{chapter_index} {ch_name}"
|
|
chapter_dir = manga_dir / folder_name
|
|
|
|
images = get_chapter_images(session, slug, ch_id)
|
|
if not images:
|
|
print(f" No images")
|
|
return False
|
|
|
|
print(f" {len(images)} pages")
|
|
chapter_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
ok = 0
|
|
failed = []
|
|
for pn, img in enumerate(images, 1):
|
|
save_path = chapter_dir / f"{pn}.jpg"
|
|
if download_image(session, img, save_path):
|
|
ok += 1
|
|
print(f" {pn}/{len(images)}", end="\r")
|
|
else:
|
|
failed.append((pn, img))
|
|
time.sleep(0.1)
|
|
|
|
if failed:
|
|
time.sleep(1)
|
|
for pn, img in failed:
|
|
save_path = chapter_dir / f"{pn}.jpg"
|
|
if download_image(session, img, save_path):
|
|
ok += 1
|
|
else:
|
|
print(f" {pn}/{len(images)} FAIL")
|
|
time.sleep(0.3)
|
|
|
|
print(f" {ok}/{len(images)} downloaded" + " " * 20)
|
|
|
|
if ok == 0:
|
|
try:
|
|
chapter_dir.rmdir()
|
|
except Exception:
|
|
pass
|
|
return False
|
|
|
|
time.sleep(REQUEST_DELAY)
|
|
return True
|
|
|
|
|
|
# ── Core: upload manga ────────────────────────────────────
|
|
|
|
|
|
def upload_manga_to_r2(manga_name, conn):
|
|
"""Upload a local manga to R2 and create DB records."""
|
|
manga_path = CONTENT_DIR / manga_name
|
|
detail_path = manga_path / "detail.json"
|
|
|
|
if not detail_path.exists():
|
|
print(f" Skipping {manga_name}: no detail.json")
|
|
return
|
|
|
|
detail = json.loads(detail_path.read_text(encoding="utf-8"))
|
|
title = detail.get("mg-title", manga_name)
|
|
slug = manga_name
|
|
genres = detail.get("mg-genres", [])
|
|
description = detail.get("mg-description", "")
|
|
if not description and genres:
|
|
description = f"Genres: {', '.join(genres)}"
|
|
genre = genres[0] if genres else "Drama"
|
|
|
|
cur = conn.cursor()
|
|
|
|
# Cover
|
|
cover_file = manga_path / "cover.jpg"
|
|
cover_url = ""
|
|
cover_key = f"manga/{slug}/cover.webp"
|
|
if cover_file.exists():
|
|
if not r2_key_exists(cover_key):
|
|
cover_url = upload_to_r2(cover_key, make_cover(cover_file))
|
|
print(f" Cover uploaded")
|
|
else:
|
|
cover_url = f"{PUBLIC_URL}/{cover_key}"
|
|
|
|
# Manga record
|
|
cur.execute('SELECT id, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
|
|
row = cur.fetchone()
|
|
if row:
|
|
manga_id, existing_cover = row
|
|
if cover_url and cover_url != existing_cover:
|
|
cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id))
|
|
conn.commit()
|
|
else:
|
|
cur.execute(
|
|
'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
|
|
"VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id",
|
|
(title, description, cover_url, slug, genre),
|
|
)
|
|
manga_id = cur.fetchone()[0]
|
|
conn.commit()
|
|
print(f" Created manga (id: {manga_id})")
|
|
|
|
# Chapters
|
|
chapter_dirs = sorted(
|
|
[d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")],
|
|
key=lambda d: parse_chapter_dir(d.name)[0],
|
|
)
|
|
|
|
for chapter_dir in chapter_dirs:
|
|
order_num, chapter_title = parse_chapter_dir(chapter_dir.name)
|
|
if order_num == 0:
|
|
continue
|
|
|
|
cur.execute('SELECT id FROM "Chapter" WHERE "mangaId" = %s AND number = %s', (manga_id, order_num))
|
|
if cur.fetchone():
|
|
print(f" [{order_num}] {chapter_title} — skip")
|
|
continue
|
|
|
|
page_files = sorted(
|
|
[f for f in chapter_dir.iterdir() if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")],
|
|
key=lambda f: int(re.search(r"(\d+)", f.stem).group(1)) if re.search(r"(\d+)", f.stem) else 0,
|
|
)
|
|
if not page_files:
|
|
continue
|
|
|
|
print(f" [{order_num}] {chapter_title} ({len(page_files)} pages)")
|
|
|
|
# Upload to R2 first
|
|
def process_page(args, _slug=slug, _order=order_num):
|
|
j, pf = args
|
|
r2_key = f"manga/{_slug}/chapters/{_order}/{j}.webp"
|
|
if not r2_key_exists(r2_key):
|
|
return j, upload_to_r2(r2_key, convert_to_webp(pf))
|
|
return j, f"{PUBLIC_URL}/{r2_key}"
|
|
|
|
page_urls = {}
|
|
done = 0
|
|
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
|
futures = {pool.submit(process_page, (j, f)): j for j, f in enumerate(page_files, 1)}
|
|
for future in as_completed(futures):
|
|
j, url = future.result()
|
|
page_urls[j] = url
|
|
done += 1
|
|
print(f" {done}/{len(page_files)}", end="\r")
|
|
|
|
if not page_urls:
|
|
print(f" Upload failed, skip")
|
|
continue
|
|
|
|
# DB records only after R2 upload succeeds
|
|
cur.execute(
|
|
'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
|
|
(manga_id, order_num, chapter_title),
|
|
)
|
|
chapter_id = cur.fetchone()[0]
|
|
for j in sorted(page_urls):
|
|
cur.execute('INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)', (chapter_id, j, page_urls[j]))
|
|
conn.commit()
|
|
print(f" {len(page_files)} pages uploaded" + " " * 10)
|
|
|
|
|
|
# ── Commands ───────────────────────────────────────────────
|
|
|
|
|
|
def cmd_setup():
|
|
print("\n Chrome will open. Solve Cloudflare on:")
|
|
print(" 1. m.happymh.com")
|
|
print(" 2. Any manga page")
|
|
print(" 3. Any reader page\n")
|
|
|
|
chrome_proc = launch_chrome(BASE_URL)
|
|
if not chrome_proc and not is_port_open(CDP_PORT):
|
|
print(" Failed to launch Chrome")
|
|
return
|
|
|
|
input(" Press ENTER when done... ")
|
|
|
|
try:
|
|
with sync_playwright() as p:
|
|
browser = p.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}")
|
|
cookies = browser.contexts[0].cookies()
|
|
cf = [c for c in cookies if c["name"] == "cf_clearance"]
|
|
print(f" cf_clearance: {'found' if cf else 'NOT found'}")
|
|
browser.close()
|
|
except Exception as e:
|
|
print(f" Could not verify: {e}")
|
|
|
|
if chrome_proc:
|
|
chrome_proc.terminate()
|
|
print()
|
|
|
|
|
|
def cmd_download(manga_url=None, chapter_set=None):
|
|
"""Download manga. chapter_set is a set of 1-based indices, or None for all."""
|
|
urls = [manga_url] if manga_url else load_manga_urls()
|
|
if not urls:
|
|
print(" No URLs in manga.json")
|
|
return
|
|
|
|
print(f"\n Downloading {len(urls)} manga(s)...\n")
|
|
|
|
def run(session):
|
|
for url in urls:
|
|
slug = slug_from_url(url)
|
|
try:
|
|
result = load_manga_page(session, slug)
|
|
if not result:
|
|
continue
|
|
chapters, metadata, cover_body = result
|
|
if not chapters:
|
|
print(" No chapters found.")
|
|
continue
|
|
print(f" Found {len(chapters)} chapters")
|
|
save_manga_local(slug, metadata, cover_body)
|
|
|
|
existing = get_existing_chapters(CONTENT_DIR / slug)
|
|
|
|
for i, ch in enumerate(chapters, 1):
|
|
if chapter_set and i not in chapter_set:
|
|
continue
|
|
if any(ch["chapterName"] in name for name in existing):
|
|
print(f" [{i}/{len(chapters)}] {ch['chapterName']} — skip")
|
|
continue
|
|
print(f" [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})")
|
|
download_chapter(session, slug, i, ch, CONTENT_DIR / slug)
|
|
print(f"\n Done: {slug}")
|
|
except Exception as e:
|
|
print(f"\n Error: {url}: {e}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|
|
with_browser(run)
|
|
print("\nDownload complete!")
|
|
|
|
|
|
def cmd_upload(manga_name=None):
|
|
if manga_name:
|
|
names = [manga_name]
|
|
else:
|
|
names = list_local_manga()
|
|
if not names:
|
|
print(" No manga in manga-content/")
|
|
return
|
|
|
|
print(f"\n Uploading {len(names)} manga(s)...")
|
|
conn = get_db()
|
|
try:
|
|
for name in names:
|
|
print(f"\n {'='*50}")
|
|
print(f" {name}")
|
|
print(f" {'='*50}")
|
|
upload_manga_to_r2(name, conn)
|
|
finally:
|
|
conn.close()
|
|
print("\nUpload complete!")
|
|
|
|
|
|
def cmd_sync(manga_url=None):
|
|
"""Sync: fetch latest chapters, stream directly to R2 (no local save)."""
|
|
urls = [manga_url] if manga_url else load_manga_urls()
|
|
if not urls:
|
|
print(" No URLs in manga.json")
|
|
return
|
|
|
|
conn = get_db()
|
|
|
|
def run(session):
|
|
for url in urls:
|
|
slug = slug_from_url(url)
|
|
|
|
print(f"\n{'='*60}")
|
|
print(f"Syncing: {slug}")
|
|
print(f"{'='*60}")
|
|
|
|
# 1. Load manga page + get chapters
|
|
result = load_manga_page(session, slug)
|
|
if not result:
|
|
continue
|
|
chapters, metadata, cover_body = result
|
|
if not chapters:
|
|
print(" No chapters found.")
|
|
continue
|
|
print(f" {len(chapters)} chapters on site")
|
|
|
|
# 2. Ensure manga in DB
|
|
cur = conn.cursor()
|
|
title = metadata.get("mg-title", slug)
|
|
genres = metadata.get("mg-genres", [])
|
|
description = metadata.get("mg-description", "")
|
|
genre = genres[0] if genres else "Drama"
|
|
|
|
# Cover → R2 (from RAM)
|
|
cover_url = ""
|
|
cover_key = f"manga/{slug}/cover.webp"
|
|
if cover_body and len(cover_body) > 100:
|
|
if not r2_key_exists(cover_key):
|
|
cover_webp = make_cover(io.BytesIO(cover_body))
|
|
cover_url = upload_to_r2(cover_key, cover_webp)
|
|
print(f" Cover uploaded to R2")
|
|
else:
|
|
cover_url = f"{PUBLIC_URL}/{cover_key}"
|
|
|
|
cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (slug,))
|
|
row = cur.fetchone()
|
|
if row:
|
|
manga_id = row[0]
|
|
if cover_url:
|
|
cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id))
|
|
conn.commit()
|
|
else:
|
|
cur.execute(
|
|
'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
|
|
"VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW()) RETURNING id",
|
|
(title, description, cover_url, slug, genre),
|
|
)
|
|
manga_id = cur.fetchone()[0]
|
|
conn.commit()
|
|
print(f" Created manga in DB (id: {manga_id})")
|
|
|
|
# 3. Find chapters missing from DB
|
|
cur.execute('SELECT number FROM "Chapter" WHERE "mangaId" = %s', (manga_id,))
|
|
existing_numbers = {row[0] for row in cur.fetchall()}
|
|
|
|
new_count = 0
|
|
for i, ch in enumerate(chapters, 1):
|
|
ch_name = ch["chapterName"]
|
|
if i in existing_numbers:
|
|
continue
|
|
|
|
new_count += 1
|
|
print(f" [{i}/{len(chapters)}] {ch_name} (id={ch['id']})")
|
|
|
|
# Get image URLs from reader page
|
|
images = get_chapter_images(session, slug, ch["id"])
|
|
if not images:
|
|
print(f" No images")
|
|
continue
|
|
|
|
print(f" {len(images)} pages")
|
|
|
|
# Fetch each image into RAM, convert to WebP, upload to R2
|
|
page_bytes = {} # page_num -> raw bytes
|
|
ok = 0
|
|
for pn, img in enumerate(images, 1):
|
|
body = fetch_image_bytes(session, img)
|
|
if body:
|
|
page_bytes[pn] = body
|
|
ok += 1
|
|
print(f" Fetched {pn}/{len(images)}", end="\r")
|
|
else:
|
|
print(f" {pn}/{len(images)} FAIL")
|
|
time.sleep(0.1)
|
|
|
|
if not page_bytes:
|
|
print(f" No images fetched, skip")
|
|
continue
|
|
|
|
# Upload to R2 first
|
|
def upload_page(args, _slug=slug, _i=i):
|
|
pn, raw = args
|
|
r2_key = f"manga/{_slug}/chapters/{_i}/{pn}.webp"
|
|
webp = convert_to_webp(io.BytesIO(raw))
|
|
return pn, upload_to_r2(r2_key, webp)
|
|
|
|
page_urls = {}
|
|
done = 0
|
|
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
|
futures = {pool.submit(upload_page, (pn, raw)): pn for pn, raw in page_bytes.items()}
|
|
for future in as_completed(futures):
|
|
pn, r2_url = future.result()
|
|
page_urls[pn] = r2_url
|
|
done += 1
|
|
print(f" R2: {done}/{len(page_bytes)}", end="\r")
|
|
|
|
if not page_urls:
|
|
print(f" R2 upload failed, skip")
|
|
continue
|
|
|
|
# Only create DB records after R2 upload succeeds
|
|
cur.execute(
|
|
'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
|
|
(manga_id, i, ch_name),
|
|
)
|
|
chapter_id = cur.fetchone()[0]
|
|
for pn in sorted(page_urls):
|
|
cur.execute('INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)', (chapter_id, pn, page_urls[pn]))
|
|
conn.commit()
|
|
print(f" {len(page_urls)} pages synced" + " " * 20)
|
|
|
|
time.sleep(REQUEST_DELAY)
|
|
|
|
if new_count == 0:
|
|
print(" Already up to date!")
|
|
else:
|
|
print(f" Synced {new_count} new chapters")
|
|
|
|
try:
|
|
with_browser(run)
|
|
finally:
|
|
conn.close()
|
|
|
|
print("\nSync complete!")
|
|
|
|
|
|
def r2_list_prefixes():
|
|
"""List manga slugs in R2 by scanning top-level prefixes under manga/."""
|
|
slugs = set()
|
|
paginator = s3.get_paginator("list_objects_v2")
|
|
for pg in paginator.paginate(Bucket=BUCKET, Prefix="manga/", Delimiter="/"):
|
|
for prefix in pg.get("CommonPrefixes", []):
|
|
# "manga/slug/" -> "slug"
|
|
slug = prefix["Prefix"].split("/")[1]
|
|
if slug:
|
|
slugs.add(slug)
|
|
return sorted(slugs)
|
|
|
|
|
|
def r2_count_by_prefix(prefix):
|
|
"""Count objects under a prefix."""
|
|
total = 0
|
|
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
|
|
total += len(pg.get("Contents", []))
|
|
return total
|
|
|
|
|
|
def r2_delete_prefix(prefix):
|
|
"""Delete all objects under a prefix."""
|
|
total = 0
|
|
batches = []
|
|
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
|
|
objects = pg.get("Contents", [])
|
|
if objects:
|
|
batches.append([{"Key": obj["Key"]} for obj in objects])
|
|
|
|
def delete_batch(keys):
|
|
s3.delete_objects(Bucket=BUCKET, Delete={"Objects": keys})
|
|
return len(keys)
|
|
|
|
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
|
for count in pool.map(delete_batch, batches):
|
|
total += count
|
|
print(f" {total} deleted", end="\r")
|
|
print(f" {total} objects deleted" + " " * 10)
|
|
return total
|
|
|
|
|
|
# ── TUI ────────────────────────────────────────────────────
|
|
|
|
|
|
def tui_select(title, options, back=True, search=False):
|
|
"""Arrow-key menu. Returns selected index or -1."""
|
|
items = list(options)
|
|
if back:
|
|
items.append("[Back]")
|
|
menu = TerminalMenu(
|
|
items,
|
|
title=title,
|
|
search_key="/" if search else None,
|
|
show_search_hint=search,
|
|
)
|
|
idx = menu.show()
|
|
if idx is None or (back and idx == len(items) - 1):
|
|
return -1
|
|
return idx
|
|
|
|
|
|
_title_cache = {}
|
|
|
|
def get_manga_title(slug):
|
|
"""Read manga title from detail.json or DB, fallback to slug."""
|
|
if slug in _title_cache:
|
|
return _title_cache[slug]
|
|
# Try local detail.json first
|
|
detail_path = CONTENT_DIR / slug / "detail.json"
|
|
if detail_path.exists():
|
|
try:
|
|
detail = json.loads(detail_path.read_text(encoding="utf-8"))
|
|
title = detail.get("mg-title")
|
|
if title:
|
|
_title_cache[slug] = title
|
|
return title
|
|
except Exception:
|
|
pass
|
|
# Try database (batch load all titles)
|
|
try:
|
|
conn = get_db()
|
|
cur = conn.cursor()
|
|
cur.execute('SELECT slug, title FROM "Manga"')
|
|
for row in cur.fetchall():
|
|
_title_cache[row[0]] = row[1]
|
|
conn.close()
|
|
if slug in _title_cache:
|
|
return _title_cache[slug]
|
|
except Exception:
|
|
pass
|
|
return slug
|
|
|
|
|
|
def manga_display_name(slug):
|
|
"""Format: 'title (slug)' or just 'slug'."""
|
|
title = get_manga_title(slug)
|
|
if title != slug:
|
|
return f"{title} ({slug})"
|
|
return slug
|
|
|
|
|
|
def tui_pick_manga_url(include_all=True):
|
|
"""Pick manga from manga.json. Shows title + slug."""
|
|
urls = load_manga_urls()
|
|
if not urls:
|
|
print(" No URLs in manga.json")
|
|
return None
|
|
slugs = [slug_from_url(u) for u in urls]
|
|
items = []
|
|
if include_all:
|
|
items.append("All manga")
|
|
items += [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
|
|
idx = tui_select("Select manga (/ to search):", items, search=True)
|
|
if idx < 0:
|
|
return None
|
|
if include_all:
|
|
if idx == 0:
|
|
return "__all__"
|
|
return urls[idx - 1]
|
|
return urls[idx]
|
|
|
|
|
|
def tui_pick_local(include_all=True):
|
|
"""Pick from local manga-content/. Shows title + slug."""
|
|
local = list_local_manga()
|
|
if not local:
|
|
print(" No manga in manga-content/")
|
|
return None
|
|
items = []
|
|
if include_all:
|
|
items.append("All manga")
|
|
items += [f"{i+1}. {manga_display_name(name)}" for i, name in enumerate(local)]
|
|
idx = tui_select("Select manga (/ to search):", items, search=True)
|
|
if idx < 0:
|
|
return None
|
|
if include_all:
|
|
if idx == 0:
|
|
return "__all__"
|
|
return local[idx - 1]
|
|
return local[idx]
|
|
|
|
|
|
def tui_pick_r2():
|
|
"""Pick manga from R2. Shows title + slug."""
|
|
slugs = r2_list_prefixes()
|
|
if not slugs:
|
|
print(" R2 is empty")
|
|
return None
|
|
items = [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
|
|
idx = tui_select("Select manga (/ to search):", items, search=True)
|
|
return slugs[idx] if idx >= 0 else None
|
|
|
|
|
|
def tui_pick_chapters(chapters, slug=None):
|
|
"""Multi-select chapter picker. Space to toggle, Enter to confirm.
|
|
Existing chapters shown grayed out. Returns set of selected 1-based indices, or None for all."""
|
|
# Check which chapters already exist locally
|
|
existing = set()
|
|
if slug:
|
|
existing = get_existing_chapters(CONTENT_DIR / slug)
|
|
|
|
# Count existing
|
|
existing_count = 0
|
|
for i, ch in enumerate(chapters, 1):
|
|
if any(ch["chapterName"] in name for name in existing):
|
|
existing_count += 1
|
|
|
|
idx = tui_select(f"{len(chapters)} chapters ({existing_count} downloaded)", [
|
|
"All chapters (skip existing)",
|
|
"Select chapters (space to toggle)",
|
|
])
|
|
if idx == -1:
|
|
return "back"
|
|
if idx == 0:
|
|
return None # all
|
|
|
|
items = []
|
|
for i, ch in enumerate(chapters, 1):
|
|
done = any(ch["chapterName"] in name for name in existing)
|
|
label = f"{i}. {ch['chapterName']}"
|
|
if done:
|
|
label = f"\033[90m{label} [done]\033[0m"
|
|
items.append(label)
|
|
|
|
menu = TerminalMenu(
|
|
items,
|
|
title="Space=toggle, Enter=confirm, /=search:",
|
|
multi_select=True,
|
|
show_multi_select_hint=True,
|
|
search_key="/",
|
|
show_search_hint=True,
|
|
)
|
|
selected = menu.show()
|
|
if selected is None:
|
|
return "back"
|
|
if isinstance(selected, int):
|
|
selected = (selected,)
|
|
return {i + 1 for i in selected} # 1-based
|
|
|
|
|
|
def tui_download():
|
|
picked = tui_pick_manga_url()
|
|
if not picked:
|
|
return
|
|
if picked == "__all__":
|
|
cmd_download()
|
|
return
|
|
|
|
slug = slug_from_url(picked)
|
|
print(f"\n Fetching chapters for {slug}...")
|
|
|
|
def get_chapters(session):
|
|
page = session.page
|
|
try:
|
|
page.goto(f"{BASE_URL}/manga/{slug}", wait_until="commit", timeout=60000)
|
|
except Exception:
|
|
pass
|
|
if not wait_for_cloudflare(session):
|
|
return None
|
|
return fetch_chapters_via_api(session.page, slug)
|
|
|
|
chapters = with_browser(get_chapters)
|
|
|
|
if not chapters:
|
|
print(" Could not get chapters")
|
|
return
|
|
|
|
result = tui_pick_chapters(chapters, slug=slug)
|
|
if result == "back":
|
|
return
|
|
cmd_download(picked, chapter_set=result)
|
|
|
|
|
|
def tui_upload():
|
|
picked = tui_pick_local()
|
|
if not picked:
|
|
return
|
|
if picked == "__all__":
|
|
cmd_upload()
|
|
else:
|
|
cmd_upload(picked)
|
|
|
|
|
|
def tui_sync():
|
|
picked = tui_pick_manga_url()
|
|
if not picked:
|
|
return
|
|
if picked == "__all__":
|
|
cmd_sync()
|
|
else:
|
|
cmd_sync(picked)
|
|
|
|
|
|
def tui_r2_manage():
|
|
while True:
|
|
idx = tui_select("R2 / DB Management", [
|
|
"Status",
|
|
"Delete specific manga",
|
|
"Clear ALL (R2 + DB)",
|
|
])
|
|
if idx == -1:
|
|
break
|
|
|
|
elif idx == 0:
|
|
# Count R2 objects in single pass
|
|
slug_counts = {}
|
|
total = 0
|
|
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET):
|
|
for obj in pg.get("Contents", []):
|
|
total += 1
|
|
parts = obj["Key"].split("/")
|
|
if len(parts) >= 2 and parts[0] == "manga":
|
|
slug_counts[parts[1]] = slug_counts.get(parts[1], 0) + 1
|
|
print(f"\n R2: {total} objects, {len(slug_counts)} manga")
|
|
for slug in sorted(slug_counts):
|
|
print(f" {manga_display_name(slug)}: {slug_counts[slug]} objects")
|
|
try:
|
|
conn = get_db()
|
|
cur = conn.cursor()
|
|
cur.execute('SELECT COUNT(*) FROM "Manga"')
|
|
mc = cur.fetchone()[0]
|
|
cur.execute('SELECT COUNT(*) FROM "Chapter"')
|
|
cc = cur.fetchone()[0]
|
|
cur.execute('SELECT COUNT(*) FROM "Page"')
|
|
pc = cur.fetchone()[0]
|
|
print(f" DB: {mc} manga, {cc} chapters, {pc} pages")
|
|
conn.close()
|
|
except Exception as e:
|
|
print(f" DB: {e}")
|
|
input("\n Press ENTER...")
|
|
|
|
elif idx == 1:
|
|
picked = tui_pick_r2()
|
|
if not picked:
|
|
continue
|
|
confirm = input(f" Delete {picked} from R2 + DB? [y/N] ").strip().lower()
|
|
if confirm == "y":
|
|
r2_delete_prefix(f"manga/{picked}/")
|
|
try:
|
|
conn = get_db()
|
|
cur = conn.cursor()
|
|
cur.execute('SELECT id FROM "Manga" WHERE slug = %s', (picked,))
|
|
row = cur.fetchone()
|
|
if row:
|
|
mid = row[0]
|
|
cur.execute('DELETE FROM "Page" WHERE "chapterId" IN (SELECT id FROM "Chapter" WHERE "mangaId" = %s)', (mid,))
|
|
cur.execute('DELETE FROM "Chapter" WHERE "mangaId" = %s', (mid,))
|
|
cur.execute('DELETE FROM "Manga" WHERE id = %s', (mid,))
|
|
conn.commit()
|
|
print(f" Removed from R2 + DB")
|
|
conn.close()
|
|
except Exception as e:
|
|
print(f" DB error: {e}")
|
|
|
|
elif idx == 2:
|
|
confirm = input(" Delete ALL R2 + DB? [y/N] ").strip().lower()
|
|
if confirm == "y":
|
|
r2_delete_prefix("")
|
|
try:
|
|
conn = get_db()
|
|
cur = conn.cursor()
|
|
for t in ['"Page"', '"Chapter"', '"Manga"']:
|
|
cur.execute(f"DELETE FROM {t}")
|
|
conn.commit()
|
|
conn.close()
|
|
print(" All cleared")
|
|
except Exception as e:
|
|
print(f" DB error: {e}")
|
|
|
|
|
|
def main():
|
|
while True:
|
|
idx = tui_select("Manga Toolkit", [
|
|
"Setup (solve Cloudflare)",
|
|
"Download",
|
|
"Upload (local -> R2)",
|
|
"Sync (site -> R2)",
|
|
"R2 / DB management",
|
|
"Quit",
|
|
], back=False)
|
|
|
|
if idx is None or idx == -1 or idx == 5:
|
|
break
|
|
elif idx == 0:
|
|
cmd_setup()
|
|
elif idx == 1:
|
|
tui_download()
|
|
elif idx == 2:
|
|
tui_upload()
|
|
elif idx == 3:
|
|
tui_sync()
|
|
elif idx == 4:
|
|
tui_r2_manage()
|
|
|
|
print("Bye!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|