Add ESC-to-stop, manga edit, R2 recompress, persistent browser session

- Persistent Chrome session: lazy-started, reused across all operations
  in one run (closed only on Quit). Eliminates per-command startup delay.
- ESC key gracefully stops Download/Upload/Sync after current chapter.
- Edit manga info: TUI form to update title/description/genre/status/cover
  in DB without re-syncing.
- R2 recompress: re-encode all webp images for a manga at quality 65 to
  reclaim storage. Skips files where new size is not smaller.
- Sync now refreshes title/description/genre on existing manga records,
  and saves all genres comma-separated (was only the first).
- Cover detection waits up to 8s for image to appear in DOM, with
  fallback selector.
- WebP encoding uses method=6 quality=75 for smaller files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
yiekheng 2026-04-12 10:17:06 +08:00
parent fab3b413b8
commit e037996c5c

407
manga.py
View File

@ -9,9 +9,14 @@ import io
import json import json
import os import os
import re import re
import select
import sys
import time import time
import socket import socket
import subprocess import subprocess
import termios
import threading
import tty
from concurrent.futures import ThreadPoolExecutor, as_completed from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path from pathlib import Path
from urllib.parse import urlparse from urllib.parse import urlparse
@ -50,6 +55,52 @@ PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/")
DATABASE_URL = os.environ["DATABASE_URL"] DATABASE_URL = os.environ["DATABASE_URL"]
# ── ESC listener ───────────────────────────────────────────
class EscListener:
"""Context manager: listens for ESC key in background, sets self.stop event."""
def __init__(self):
self.stop = threading.Event()
self._thread = None
self._old = None
self._fd = None
def __enter__(self):
if not sys.stdin.isatty():
return self
self._fd = sys.stdin.fileno()
try:
self._old = termios.tcgetattr(self._fd)
tty.setcbreak(self._fd)
except Exception:
self._old = None
return self
self._thread = threading.Thread(target=self._listen, daemon=True)
self._thread.start()
return self
def _listen(self):
while not self.stop.is_set():
try:
r, _, _ = select.select([sys.stdin], [], [], 0.2)
if r and sys.stdin.read(1) == "\x1b":
self.stop.set()
print("\n ESC pressed — stopping after current item...")
return
except Exception:
return
def __exit__(self, *args):
self.stop.set()
if self._old is not None:
try:
termios.tcsetattr(self._fd, termios.TCSADRAIN, self._old)
except Exception:
pass
# ── Chrome management ────────────────────────────────────── # ── Chrome management ──────────────────────────────────────
@ -126,14 +177,29 @@ class BrowserSession:
self.playwright.stop() self.playwright.stop()
_session_singleton = None
def get_session():
"""Get or lazy-start the global Chrome session."""
global _session_singleton
if _session_singleton is None:
_session_singleton = BrowserSession()
_session_singleton.start()
return _session_singleton
def close_session():
"""Close the global Chrome session (called on exit)."""
global _session_singleton
if _session_singleton is not None:
_session_singleton.close()
_session_singleton = None
def with_browser(func): def with_browser(func):
"""Run func(session) inside a Chrome+CDP session. Returns func's result.""" """Run func(session) using the persistent Chrome session."""
session = BrowserSession() return func(get_session())
session.start()
try:
return func(session)
finally:
session.close()
# ── Cloudflare ───────────────────────────────────────────── # ── Cloudflare ─────────────────────────────────────────────
@ -782,9 +848,7 @@ def upload_manga_to_r2(manga_name, conn):
slug = manga_name slug = manga_name
genres = detail.get("mg-genres", []) genres = detail.get("mg-genres", [])
description = detail.get("mg-description", "") description = detail.get("mg-description", "")
if not description and genres: genre = ", ".join(genres) if genres else "Drama"
description = f"Genres: {', '.join(genres)}"
genre = genres[0] if genres else "Drama"
cur = conn.cursor() cur = conn.cursor()
@ -885,25 +949,27 @@ def cmd_setup():
print(" 2. Any manga page") print(" 2. Any manga page")
print(" 3. Any reader page\n") print(" 3. Any reader page\n")
chrome_proc = launch_chrome(BASE_URL) session = get_session()
if not chrome_proc and not is_port_open(CDP_PORT): try:
print(" Failed to launch Chrome") session.page.goto(BASE_URL, wait_until="commit", timeout=60000)
return except Exception:
pass
# Bring Chrome to front for setup
try:
subprocess.Popen(
["osascript", "-e", 'tell application "Google Chrome" to activate'],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
except Exception:
pass
input(" Press ENTER when done... ") input(" Press ENTER when done... ")
try: cookies = session.browser.contexts[0].cookies()
with sync_playwright() as p: cf = [c for c in cookies if c["name"] == "cf_clearance"]
browser = p.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}") print(f" cf_clearance: {'found' if cf else 'NOT found'}")
cookies = browser.contexts[0].cookies() hide_chrome()
cf = [c for c in cookies if c["name"] == "cf_clearance"]
print(f" cf_clearance: {'found' if cf else 'NOT found'}")
browser.close()
except Exception as e:
print(f" Could not verify: {e}")
if chrome_proc:
chrome_proc.terminate()
print() print()
@ -914,37 +980,42 @@ def cmd_download(manga_url=None, chapter_set=None):
print(" No URLs in manga.json") print(" No URLs in manga.json")
return return
print(f"\n Downloading {len(urls)} manga(s)...\n") print(f"\n Downloading {len(urls)} manga(s)... (ESC to stop)\n")
def run(session): def run(session):
for url in urls: with EscListener() as esc:
slug = slug_from_url(url) for url in urls:
try: if esc.stop.is_set():
result = load_manga_page(session, slug) break
if not result: slug = slug_from_url(url)
continue try:
chapters, metadata, cover_body = result result = load_manga_page(session, slug)
if not chapters: if not result:
print(" No chapters found.")
continue
print(f" Found {len(chapters)} chapters")
save_manga_local(slug, metadata, cover_body)
existing = get_existing_chapters(CONTENT_DIR / slug)
for i, ch in enumerate(chapters, 1):
if chapter_set and i not in chapter_set:
continue continue
if any(ch["chapterName"] in name for name in existing): chapters, metadata, cover_body = result
print(f" [{i}/{len(chapters)}] {ch['chapterName']} — skip") if not chapters:
print(" No chapters found.")
continue continue
print(f" [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})") print(f" Found {len(chapters)} chapters")
download_chapter(session, slug, i, ch, CONTENT_DIR / slug) save_manga_local(slug, metadata, cover_body)
print(f"\n Done: {slug}")
except Exception as e: existing = get_existing_chapters(CONTENT_DIR / slug)
print(f"\n Error: {url}: {e}")
import traceback for i, ch in enumerate(chapters, 1):
traceback.print_exc() if esc.stop.is_set():
break
if chapter_set and i not in chapter_set:
continue
if any(ch["chapterName"] in name for name in existing):
print(f" [{i}/{len(chapters)}] {ch['chapterName']} — skip")
continue
print(f" [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})")
download_chapter(session, slug, i, ch, CONTENT_DIR / slug)
print(f"\n Done: {slug}")
except Exception as e:
print(f"\n Error: {url}: {e}")
import traceback
traceback.print_exc()
with_browser(run) with_browser(run)
print("\nDownload complete!") print("\nDownload complete!")
@ -959,14 +1030,17 @@ def cmd_upload(manga_name=None):
print(" No manga in manga-content/") print(" No manga in manga-content/")
return return
print(f"\n Uploading {len(names)} manga(s)...") print(f"\n Uploading {len(names)} manga(s)... (ESC to stop)")
conn = get_db() conn = get_db()
try: try:
for name in names: with EscListener() as esc:
print(f"\n {'='*50}") for name in names:
print(f" {name}") if esc.stop.is_set():
print(f" {'='*50}") break
upload_manga_to_r2(name, conn) print(f"\n {'='*50}")
print(f" {name}")
print(f" {'='*50}")
upload_manga_to_r2(name, conn)
finally: finally:
conn.close() conn.close()
print("\nUpload complete!") print("\nUpload complete!")
@ -982,7 +1056,10 @@ def cmd_sync(manga_url=None):
conn = get_db() conn = get_db()
def run(session): def run(session):
with EscListener() as esc:
for url in urls: for url in urls:
if esc.stop.is_set():
break
slug = slug_from_url(url) slug = slug_from_url(url)
print(f"\n{'='*60}") print(f"\n{'='*60}")
@ -1004,7 +1081,7 @@ def cmd_sync(manga_url=None):
title = metadata.get("mg-title", slug) title = metadata.get("mg-title", slug)
genres = metadata.get("mg-genres", []) genres = metadata.get("mg-genres", [])
description = metadata.get("mg-description", "") description = metadata.get("mg-description", "")
genre = genres[0] if genres else "Drama" genre = ", ".join(genres) if genres else "Drama"
# Cover → R2 (from RAM) # Cover → R2 (from RAM)
cover_url = "" cover_url = ""
@ -1021,9 +1098,21 @@ def cmd_sync(manga_url=None):
row = cur.fetchone() row = cur.fetchone()
if row: if row:
manga_id = row[0] manga_id = row[0]
# Refresh metadata fields (cover only updated if we have a new one)
if cover_url: if cover_url:
cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id)) cur.execute(
conn.commit() 'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'"coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
(title, description, genre, cover_url, manga_id),
)
else:
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'"updatedAt" = NOW() WHERE id = %s',
(title, description, genre, manga_id),
)
conn.commit()
print(f" Updated metadata (genre: {genre})")
else: else:
cur.execute( cur.execute(
'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") ' 'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
@ -1040,6 +1129,8 @@ def cmd_sync(manga_url=None):
new_count = 0 new_count = 0
for i, ch in enumerate(chapters, 1): for i, ch in enumerate(chapters, 1):
if esc.stop.is_set():
break
ch_name = ch["chapterName"] ch_name = ch["chapterName"]
if i in existing_numbers: if i in existing_numbers:
continue continue
@ -1161,6 +1252,51 @@ def r2_delete_prefix(prefix):
return total return total
def r2_recompress(slug, quality=65):
"""Download all webp images for a manga, re-encode at lower quality, re-upload."""
prefix = f"manga/{slug}/"
keys = []
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
for obj in pg.get("Contents", []):
if obj["Key"].endswith(".webp"):
keys.append(obj["Key"])
if not keys:
print(f" No webp files for {slug}")
return
print(f" {len(keys)} files to recompress (quality={quality})")
saved_total = 0
failed = 0
def recompress_one(key):
try:
original = s3.get_object(Bucket=BUCKET, Key=key)["Body"].read()
new_data = _to_webp_bytes(Image.open(io.BytesIO(original)), quality=quality)
saved = len(original) - len(new_data)
if saved > 0:
s3.put_object(Bucket=BUCKET, Key=key, Body=new_data, ContentType="image/webp")
return saved
return 0
except Exception:
return -1
done = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
for saved in pool.map(recompress_one, keys):
done += 1
if saved < 0:
failed += 1
else:
saved_total += saved
print(f" {done}/{len(keys)} — saved {saved_total // 1024} KB", end="\r")
msg = f" Done: {done}/{len(keys)} processed, {saved_total // (1024 * 1024)} MB saved"
if failed:
msg += f" ({failed} failed)"
print(msg + " " * 10)
# ── TUI ──────────────────────────────────────────────────── # ── TUI ────────────────────────────────────────────────────
@ -1373,12 +1509,89 @@ def tui_sync():
cmd_sync(picked) cmd_sync(picked)
def tui_edit_manga():
"""Edit manga metadata (title, description, genre, status) in DB."""
try:
conn = get_db()
cur = conn.cursor()
cur.execute('SELECT slug, title FROM "Manga" ORDER BY title')
rows = cur.fetchall()
except Exception as e:
print(f" DB error: {e}")
return
if not rows:
print(" No manga in DB")
conn.close()
return
items = [f"{i+1}. {title} ({slug})" for i, (slug, title) in enumerate(rows)]
sel = tui_select("Select manga to edit (/ to search):", items, search=True)
if sel < 0:
conn.close()
return
slug, _ = rows[sel]
cur.execute('SELECT id, title, description, genre, status, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
row = cur.fetchone()
if not row:
print(" Not found")
conn.close()
return
mid, title, description, genre, status, cover_url = row
while True:
print(f"\n Editing: {slug}")
print(f" title: {title}")
print(f" description: {(description or '')[:80]}{'...' if description and len(description) > 80 else ''}")
print(f" genre: {genre}")
print(f" status: {status}")
print(f" coverUrl: {cover_url}")
idx = tui_select("Edit field", [
"title", "description", "genre", "status", "coverUrl",
"Save & exit", "Discard & exit",
])
if idx == -1 or idx == 6:
print(" Discarded.")
break
if idx == 5:
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'status = %s, "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
(title, description, genre, status, cover_url, mid),
)
conn.commit()
print(" Saved.")
break
if idx == 3: # status
opts = ["PUBLISHED", "DRAFT", "HIDDEN"]
s_idx = tui_select("Status:", opts)
if s_idx >= 0:
status = opts[s_idx]
else:
field_name = ["title", "description", "genre", "status", "coverUrl"][idx]
current = {"title": title, "description": description or "", "genre": genre, "coverUrl": cover_url or ""}[field_name]
print(f" Current: {current}")
new_val = input(f" New {field_name} (empty=keep): ").strip()
if new_val:
if idx == 0: title = new_val
elif idx == 1: description = new_val
elif idx == 2: genre = new_val
elif idx == 4: cover_url = new_val
conn.close()
def tui_r2_manage(): def tui_r2_manage():
while True: while True:
idx = tui_select("R2 / DB Management", [ idx = tui_select("R2 / DB Management", [
"Status", "Status",
"Edit manga info",
"Delete specific manga", "Delete specific manga",
"Clear ALL (R2 + DB)", "Clear ALL (R2 + DB)",
"Recompress manga (quality 65)",
]) ])
if idx == -1: if idx == -1:
break break
@ -1412,6 +1625,9 @@ def tui_r2_manage():
input("\n Press ENTER...") input("\n Press ENTER...")
elif idx == 1: elif idx == 1:
tui_edit_manga()
elif idx == 2:
picked = tui_pick_r2() picked = tui_pick_r2()
if not picked: if not picked:
continue continue
@ -1434,7 +1650,7 @@ def tui_r2_manage():
except Exception as e: except Exception as e:
print(f" DB error: {e}") print(f" DB error: {e}")
elif idx == 2: elif idx == 3:
confirm = input(" Delete ALL R2 + DB? [y/N] ").strip().lower() confirm = input(" Delete ALL R2 + DB? [y/N] ").strip().lower()
if confirm == "y": if confirm == "y":
r2_delete_prefix("") r2_delete_prefix("")
@ -1449,32 +1665,51 @@ def tui_r2_manage():
except Exception as e: except Exception as e:
print(f" DB error: {e}") print(f" DB error: {e}")
elif idx == 4:
slugs = r2_list_prefixes()
if not slugs:
print(" R2 is empty")
continue
items = ["All manga"] + [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
sel = tui_select("Recompress which? (quality=65, overwrites originals)", items, search=True)
if sel < 0:
continue
targets = slugs if sel == 0 else [slugs[sel - 1]]
confirm = input(f" Recompress {len(targets)} manga to quality 65? [y/N] ").strip().lower()
if confirm != "y":
continue
for slug in targets:
print(f"\n {manga_display_name(slug)}")
r2_recompress(slug, quality=65)
def main(): def main():
while True: try:
idx = tui_select("Manga Toolkit", [ while True:
"Setup (solve Cloudflare)", idx = tui_select("Manga Toolkit", [
"Download", "Setup (solve Cloudflare)",
"Upload (local -> R2)", "Download",
"Sync (site -> R2)", "Upload (local -> R2)",
"R2 / DB management", "Sync (site -> R2)",
"Quit", "R2 / DB management",
], back=False) "Quit",
], back=False)
if idx is None or idx == -1 or idx == 5: if idx is None or idx == -1 or idx == 5:
break break
elif idx == 0: elif idx == 0:
cmd_setup() cmd_setup()
elif idx == 1: elif idx == 1:
tui_download() tui_download()
elif idx == 2: elif idx == 2:
tui_upload() tui_upload()
elif idx == 3: elif idx == 3:
tui_sync() tui_sync()
elif idx == 4: elif idx == 4:
tui_r2_manage() tui_r2_manage()
finally:
print("Bye!") close_session()
print("Bye!")
if __name__ == "__main__": if __name__ == "__main__":