Add ESC-to-stop, manga edit, R2 recompress, persistent browser session

- Persistent Chrome session: lazy-started, reused across all operations
  in one run (closed only on Quit). Eliminates per-command startup delay.
- ESC key gracefully stops Download/Upload/Sync after current chapter.
- Edit manga info: TUI form to update title/description/genre/status/cover
  in DB without re-syncing.
- R2 recompress: re-encode all webp images for a manga at quality 65 to
  reclaim storage. Skips files where new size is not smaller.
- Sync now refreshes title/description/genre on existing manga records,
  and saves all genres comma-separated (was only the first).
- Cover detection waits up to 8s for image to appear in DOM, with
  fallback selector.
- WebP encoding uses method=6 quality=75 for smaller files.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
yiekheng 2026-04-12 10:17:06 +08:00
parent fab3b413b8
commit e037996c5c

407
manga.py
View File

@ -9,9 +9,14 @@ import io
import json
import os
import re
import select
import sys
import time
import socket
import subprocess
import termios
import threading
import tty
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
from urllib.parse import urlparse
@ -50,6 +55,52 @@ PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/")
DATABASE_URL = os.environ["DATABASE_URL"]
# ── ESC listener ───────────────────────────────────────────
class EscListener:
"""Context manager: listens for ESC key in background, sets self.stop event."""
def __init__(self):
self.stop = threading.Event()
self._thread = None
self._old = None
self._fd = None
def __enter__(self):
if not sys.stdin.isatty():
return self
self._fd = sys.stdin.fileno()
try:
self._old = termios.tcgetattr(self._fd)
tty.setcbreak(self._fd)
except Exception:
self._old = None
return self
self._thread = threading.Thread(target=self._listen, daemon=True)
self._thread.start()
return self
def _listen(self):
while not self.stop.is_set():
try:
r, _, _ = select.select([sys.stdin], [], [], 0.2)
if r and sys.stdin.read(1) == "\x1b":
self.stop.set()
print("\n ESC pressed — stopping after current item...")
return
except Exception:
return
def __exit__(self, *args):
self.stop.set()
if self._old is not None:
try:
termios.tcsetattr(self._fd, termios.TCSADRAIN, self._old)
except Exception:
pass
# ── Chrome management ──────────────────────────────────────
@ -126,14 +177,29 @@ class BrowserSession:
self.playwright.stop()
_session_singleton = None
def get_session():
"""Get or lazy-start the global Chrome session."""
global _session_singleton
if _session_singleton is None:
_session_singleton = BrowserSession()
_session_singleton.start()
return _session_singleton
def close_session():
"""Close the global Chrome session (called on exit)."""
global _session_singleton
if _session_singleton is not None:
_session_singleton.close()
_session_singleton = None
def with_browser(func):
"""Run func(session) inside a Chrome+CDP session. Returns func's result."""
session = BrowserSession()
session.start()
try:
return func(session)
finally:
session.close()
"""Run func(session) using the persistent Chrome session."""
return func(get_session())
# ── Cloudflare ─────────────────────────────────────────────
@ -782,9 +848,7 @@ def upload_manga_to_r2(manga_name, conn):
slug = manga_name
genres = detail.get("mg-genres", [])
description = detail.get("mg-description", "")
if not description and genres:
description = f"Genres: {', '.join(genres)}"
genre = genres[0] if genres else "Drama"
genre = ", ".join(genres) if genres else "Drama"
cur = conn.cursor()
@ -885,25 +949,27 @@ def cmd_setup():
print(" 2. Any manga page")
print(" 3. Any reader page\n")
chrome_proc = launch_chrome(BASE_URL)
if not chrome_proc and not is_port_open(CDP_PORT):
print(" Failed to launch Chrome")
return
session = get_session()
try:
session.page.goto(BASE_URL, wait_until="commit", timeout=60000)
except Exception:
pass
# Bring Chrome to front for setup
try:
subprocess.Popen(
["osascript", "-e", 'tell application "Google Chrome" to activate'],
stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL,
)
except Exception:
pass
input(" Press ENTER when done... ")
try:
with sync_playwright() as p:
browser = p.chromium.connect_over_cdp(f"http://localhost:{CDP_PORT}")
cookies = browser.contexts[0].cookies()
cf = [c for c in cookies if c["name"] == "cf_clearance"]
print(f" cf_clearance: {'found' if cf else 'NOT found'}")
browser.close()
except Exception as e:
print(f" Could not verify: {e}")
if chrome_proc:
chrome_proc.terminate()
cookies = session.browser.contexts[0].cookies()
cf = [c for c in cookies if c["name"] == "cf_clearance"]
print(f" cf_clearance: {'found' if cf else 'NOT found'}")
hide_chrome()
print()
@ -914,37 +980,42 @@ def cmd_download(manga_url=None, chapter_set=None):
print(" No URLs in manga.json")
return
print(f"\n Downloading {len(urls)} manga(s)...\n")
print(f"\n Downloading {len(urls)} manga(s)... (ESC to stop)\n")
def run(session):
for url in urls:
slug = slug_from_url(url)
try:
result = load_manga_page(session, slug)
if not result:
continue
chapters, metadata, cover_body = result
if not chapters:
print(" No chapters found.")
continue
print(f" Found {len(chapters)} chapters")
save_manga_local(slug, metadata, cover_body)
existing = get_existing_chapters(CONTENT_DIR / slug)
for i, ch in enumerate(chapters, 1):
if chapter_set and i not in chapter_set:
with EscListener() as esc:
for url in urls:
if esc.stop.is_set():
break
slug = slug_from_url(url)
try:
result = load_manga_page(session, slug)
if not result:
continue
if any(ch["chapterName"] in name for name in existing):
print(f" [{i}/{len(chapters)}] {ch['chapterName']} — skip")
chapters, metadata, cover_body = result
if not chapters:
print(" No chapters found.")
continue
print(f" [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})")
download_chapter(session, slug, i, ch, CONTENT_DIR / slug)
print(f"\n Done: {slug}")
except Exception as e:
print(f"\n Error: {url}: {e}")
import traceback
traceback.print_exc()
print(f" Found {len(chapters)} chapters")
save_manga_local(slug, metadata, cover_body)
existing = get_existing_chapters(CONTENT_DIR / slug)
for i, ch in enumerate(chapters, 1):
if esc.stop.is_set():
break
if chapter_set and i not in chapter_set:
continue
if any(ch["chapterName"] in name for name in existing):
print(f" [{i}/{len(chapters)}] {ch['chapterName']} — skip")
continue
print(f" [{i}/{len(chapters)}] {ch['chapterName']} (id={ch['id']})")
download_chapter(session, slug, i, ch, CONTENT_DIR / slug)
print(f"\n Done: {slug}")
except Exception as e:
print(f"\n Error: {url}: {e}")
import traceback
traceback.print_exc()
with_browser(run)
print("\nDownload complete!")
@ -959,14 +1030,17 @@ def cmd_upload(manga_name=None):
print(" No manga in manga-content/")
return
print(f"\n Uploading {len(names)} manga(s)...")
print(f"\n Uploading {len(names)} manga(s)... (ESC to stop)")
conn = get_db()
try:
for name in names:
print(f"\n {'='*50}")
print(f" {name}")
print(f" {'='*50}")
upload_manga_to_r2(name, conn)
with EscListener() as esc:
for name in names:
if esc.stop.is_set():
break
print(f"\n {'='*50}")
print(f" {name}")
print(f" {'='*50}")
upload_manga_to_r2(name, conn)
finally:
conn.close()
print("\nUpload complete!")
@ -982,7 +1056,10 @@ def cmd_sync(manga_url=None):
conn = get_db()
def run(session):
with EscListener() as esc:
for url in urls:
if esc.stop.is_set():
break
slug = slug_from_url(url)
print(f"\n{'='*60}")
@ -1004,7 +1081,7 @@ def cmd_sync(manga_url=None):
title = metadata.get("mg-title", slug)
genres = metadata.get("mg-genres", [])
description = metadata.get("mg-description", "")
genre = genres[0] if genres else "Drama"
genre = ", ".join(genres) if genres else "Drama"
# Cover → R2 (from RAM)
cover_url = ""
@ -1021,9 +1098,21 @@ def cmd_sync(manga_url=None):
row = cur.fetchone()
if row:
manga_id = row[0]
# Refresh metadata fields (cover only updated if we have a new one)
if cover_url:
cur.execute('UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s', (cover_url, manga_id))
conn.commit()
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'"coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
(title, description, genre, cover_url, manga_id),
)
else:
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'"updatedAt" = NOW() WHERE id = %s',
(title, description, genre, manga_id),
)
conn.commit()
print(f" Updated metadata (genre: {genre})")
else:
cur.execute(
'INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt") '
@ -1040,6 +1129,8 @@ def cmd_sync(manga_url=None):
new_count = 0
for i, ch in enumerate(chapters, 1):
if esc.stop.is_set():
break
ch_name = ch["chapterName"]
if i in existing_numbers:
continue
@ -1161,6 +1252,51 @@ def r2_delete_prefix(prefix):
return total
def r2_recompress(slug, quality=65):
"""Download all webp images for a manga, re-encode at lower quality, re-upload."""
prefix = f"manga/{slug}/"
keys = []
for pg in s3.get_paginator("list_objects_v2").paginate(Bucket=BUCKET, Prefix=prefix):
for obj in pg.get("Contents", []):
if obj["Key"].endswith(".webp"):
keys.append(obj["Key"])
if not keys:
print(f" No webp files for {slug}")
return
print(f" {len(keys)} files to recompress (quality={quality})")
saved_total = 0
failed = 0
def recompress_one(key):
try:
original = s3.get_object(Bucket=BUCKET, Key=key)["Body"].read()
new_data = _to_webp_bytes(Image.open(io.BytesIO(original)), quality=quality)
saved = len(original) - len(new_data)
if saved > 0:
s3.put_object(Bucket=BUCKET, Key=key, Body=new_data, ContentType="image/webp")
return saved
return 0
except Exception:
return -1
done = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
for saved in pool.map(recompress_one, keys):
done += 1
if saved < 0:
failed += 1
else:
saved_total += saved
print(f" {done}/{len(keys)} — saved {saved_total // 1024} KB", end="\r")
msg = f" Done: {done}/{len(keys)} processed, {saved_total // (1024 * 1024)} MB saved"
if failed:
msg += f" ({failed} failed)"
print(msg + " " * 10)
# ── TUI ────────────────────────────────────────────────────
@ -1373,12 +1509,89 @@ def tui_sync():
cmd_sync(picked)
def tui_edit_manga():
"""Edit manga metadata (title, description, genre, status) in DB."""
try:
conn = get_db()
cur = conn.cursor()
cur.execute('SELECT slug, title FROM "Manga" ORDER BY title')
rows = cur.fetchall()
except Exception as e:
print(f" DB error: {e}")
return
if not rows:
print(" No manga in DB")
conn.close()
return
items = [f"{i+1}. {title} ({slug})" for i, (slug, title) in enumerate(rows)]
sel = tui_select("Select manga to edit (/ to search):", items, search=True)
if sel < 0:
conn.close()
return
slug, _ = rows[sel]
cur.execute('SELECT id, title, description, genre, status, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
row = cur.fetchone()
if not row:
print(" Not found")
conn.close()
return
mid, title, description, genre, status, cover_url = row
while True:
print(f"\n Editing: {slug}")
print(f" title: {title}")
print(f" description: {(description or '')[:80]}{'...' if description and len(description) > 80 else ''}")
print(f" genre: {genre}")
print(f" status: {status}")
print(f" coverUrl: {cover_url}")
idx = tui_select("Edit field", [
"title", "description", "genre", "status", "coverUrl",
"Save & exit", "Discard & exit",
])
if idx == -1 or idx == 6:
print(" Discarded.")
break
if idx == 5:
cur.execute(
'UPDATE "Manga" SET title = %s, description = %s, genre = %s, '
'status = %s, "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
(title, description, genre, status, cover_url, mid),
)
conn.commit()
print(" Saved.")
break
if idx == 3: # status
opts = ["PUBLISHED", "DRAFT", "HIDDEN"]
s_idx = tui_select("Status:", opts)
if s_idx >= 0:
status = opts[s_idx]
else:
field_name = ["title", "description", "genre", "status", "coverUrl"][idx]
current = {"title": title, "description": description or "", "genre": genre, "coverUrl": cover_url or ""}[field_name]
print(f" Current: {current}")
new_val = input(f" New {field_name} (empty=keep): ").strip()
if new_val:
if idx == 0: title = new_val
elif idx == 1: description = new_val
elif idx == 2: genre = new_val
elif idx == 4: cover_url = new_val
conn.close()
def tui_r2_manage():
while True:
idx = tui_select("R2 / DB Management", [
"Status",
"Edit manga info",
"Delete specific manga",
"Clear ALL (R2 + DB)",
"Recompress manga (quality 65)",
])
if idx == -1:
break
@ -1412,6 +1625,9 @@ def tui_r2_manage():
input("\n Press ENTER...")
elif idx == 1:
tui_edit_manga()
elif idx == 2:
picked = tui_pick_r2()
if not picked:
continue
@ -1434,7 +1650,7 @@ def tui_r2_manage():
except Exception as e:
print(f" DB error: {e}")
elif idx == 2:
elif idx == 3:
confirm = input(" Delete ALL R2 + DB? [y/N] ").strip().lower()
if confirm == "y":
r2_delete_prefix("")
@ -1449,32 +1665,51 @@ def tui_r2_manage():
except Exception as e:
print(f" DB error: {e}")
elif idx == 4:
slugs = r2_list_prefixes()
if not slugs:
print(" R2 is empty")
continue
items = ["All manga"] + [f"{i+1}. {manga_display_name(s)}" for i, s in enumerate(slugs)]
sel = tui_select("Recompress which? (quality=65, overwrites originals)", items, search=True)
if sel < 0:
continue
targets = slugs if sel == 0 else [slugs[sel - 1]]
confirm = input(f" Recompress {len(targets)} manga to quality 65? [y/N] ").strip().lower()
if confirm != "y":
continue
for slug in targets:
print(f"\n {manga_display_name(slug)}")
r2_recompress(slug, quality=65)
def main():
while True:
idx = tui_select("Manga Toolkit", [
"Setup (solve Cloudflare)",
"Download",
"Upload (local -> R2)",
"Sync (site -> R2)",
"R2 / DB management",
"Quit",
], back=False)
try:
while True:
idx = tui_select("Manga Toolkit", [
"Setup (solve Cloudflare)",
"Download",
"Upload (local -> R2)",
"Sync (site -> R2)",
"R2 / DB management",
"Quit",
], back=False)
if idx is None or idx == -1 or idx == 5:
break
elif idx == 0:
cmd_setup()
elif idx == 1:
tui_download()
elif idx == 2:
tui_upload()
elif idx == 3:
tui_sync()
elif idx == 4:
tui_r2_manage()
print("Bye!")
if idx is None or idx == -1 or idx == 5:
break
elif idx == 0:
cmd_setup()
elif idx == 1:
tui_download()
elif idx == 2:
tui_upload()
elif idx == 3:
tui_sync()
elif idx == 4:
tui_r2_manage()
finally:
close_session()
print("Bye!")
if __name__ == "__main__":