sunnymh-manga-dl/upload.py
2026-04-11 16:55:13 +08:00

394 lines
12 KiB
Python

"""
Interactive manga uploader — Cloudflare R2 + PostgreSQL.
R2 storage layout:
manga/<slug>/cover.webp
manga/<slug>/chapters/<number>/<page>.webp
Usage:
python upload.py
"""
import io
import json
import os
import re
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
import boto3
import psycopg2
from PIL import Image
from dotenv import load_dotenv
load_dotenv()
ROOT_DIR = Path(__file__).parent
CONTENT_DIR = ROOT_DIR / "manga-content"
# R2 config
s3 = boto3.client(
"s3",
endpoint_url=f"https://{os.environ['R2_ACCOUNT_ID']}.r2.cloudflarestorage.com",
aws_access_key_id=os.environ["R2_ACCESS_KEY"],
aws_secret_access_key=os.environ["R2_SECRET_KEY"],
region_name="auto",
)
BUCKET = os.environ["R2_BUCKET"]
PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/")
# Database
DATABASE_URL = os.environ["DATABASE_URL"]
UPLOAD_WORKERS = 8
def convert_to_webp(image_path, quality=80):
img = Image.open(image_path)
buf = io.BytesIO()
img.save(buf, format="WEBP", quality=quality)
buf.seek(0)
return buf.read()
def make_cover(image_path, width=400, height=560):
img = Image.open(image_path)
target_ratio = width / height
img_ratio = img.width / img.height
if img_ratio > target_ratio:
new_width = int(img.height * target_ratio)
left = (img.width - new_width) // 2
img = img.crop((left, 0, left + new_width, img.height))
else:
new_height = int(img.width / target_ratio)
img = img.crop((0, 0, img.width, new_height))
img = img.resize((width, height), Image.LANCZOS)
buf = io.BytesIO()
img.save(buf, format="WEBP", quality=85)
buf.seek(0)
return buf.read()
def upload_to_r2(key, data, content_type="image/webp"):
s3.put_object(Bucket=BUCKET, Key=key, Body=data, ContentType=content_type)
return f"{PUBLIC_URL}/{key}"
def r2_key_exists(key):
try:
s3.head_object(Bucket=BUCKET, Key=key)
return True
except s3.exceptions.ClientError:
return False
def get_db():
conn = psycopg2.connect(DATABASE_URL)
conn.set_client_encoding("UTF8")
return conn
def parse_chapter_dir(dir_name):
"""Parse '1 001. 序章' -> (1, '001. 序章')."""
m = re.match(r"^(\d+)\s+(.+)$", dir_name)
if m:
return int(m.group(1)), m.group(2)
return 0, dir_name
def list_local_manga():
"""List manga directories in manga-content/."""
dirs = sorted(
d.name for d in CONTENT_DIR.iterdir()
if d.is_dir() and not d.name.startswith(".")
)
return dirs
# ── Commands ──────────────────────────────────────────────
def cmd_reset():
"""Clear all R2 storage."""
print("\nClearing R2 bucket...")
total = 0
batches = []
paginator = s3.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=BUCKET):
objects = page.get("Contents", [])
if not objects:
break
batches.append([{"Key": obj["Key"]} for obj in objects])
# Delete batches in parallel
def delete_batch(keys):
s3.delete_objects(Bucket=BUCKET, Delete={"Objects": keys})
return len(keys)
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
for count in pool.map(delete_batch, batches):
total += count
print(f" {total} deleted", end="\r")
print(f" {total} objects deleted from R2" + " " * 10)
print("R2 cleared. Run 'upload' to re-upload.\n")
def cmd_status(conn):
"""Show current state of R2 and database."""
cur = conn.cursor()
# DB counts
cur.execute('SELECT COUNT(*) FROM "Manga"')
manga_count = cur.fetchone()[0]
cur.execute('SELECT COUNT(*) FROM "Chapter"')
chapter_count = cur.fetchone()[0]
cur.execute('SELECT COUNT(*) FROM "Page"')
page_count = cur.fetchone()[0]
print(f"\n Database: {manga_count} manga, {chapter_count} chapters, {page_count} pages")
# List manga in DB
cur.execute('SELECT slug, title, (SELECT COUNT(*) FROM "Chapter" WHERE "mangaId" = "Manga".id) FROM "Manga" ORDER BY slug')
for slug, title, ch_count in cur.fetchall():
print(f" {slug}: {title} ({ch_count} chapters)")
# R2 count
total = 0
paginator = s3.get_paginator("list_objects_v2")
for page in paginator.paginate(Bucket=BUCKET):
total += len(page.get("Contents", []))
print(f" R2: {total} objects")
# Local
local = list_local_manga()
print(f" Local: {len(local)} manga in manga-content/")
for name in local:
manga_path = CONTENT_DIR / name
chapters = [d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")]
has_cover = (manga_path / "cover.jpg").exists()
print(f" {name}: {len(chapters)} chapters, cover: {'yes' if has_cover else 'no'}")
print()
def cmd_upload(conn, manga_name=None):
"""Upload manga to R2 and create DB records."""
if manga_name:
manga_dirs = [manga_name]
if not (CONTENT_DIR / manga_name).is_dir():
print(f" Not found: {CONTENT_DIR / manga_name}")
return
else:
manga_dirs = list_local_manga()
if not manga_dirs:
print(" No manga found in manga-content/")
return
print(f"\n Uploading {len(manga_dirs)} manga(s)...")
for name in manga_dirs:
upload_manga(name, conn)
print("\nUpload complete!")
def upload_manga(manga_name, conn):
manga_path = CONTENT_DIR / manga_name
detail_path = manga_path / "detail.json"
if not detail_path.exists():
print(f" Skipping {manga_name}: no detail.json")
return
detail = json.loads(detail_path.read_text(encoding="utf-8"))
title = detail.get("mg-title", manga_name)
slug = manga_name
genres = detail.get("mg-genres", [])
description = detail.get("mg-description", "")
if not description and genres:
description = f"Genres: {', '.join(genres)}"
genre = genres[0] if genres else "Drama"
print(f"\n {'='*50}")
print(f" {title} ({slug})")
print(f" {'='*50}")
cur = conn.cursor()
# Cover
cover_file = manga_path / "cover.jpg"
cover_url = ""
cover_key = f"manga/{slug}/cover.webp"
if cover_file.exists():
if not r2_key_exists(cover_key):
cover_data = make_cover(cover_file)
cover_url = upload_to_r2(cover_key, cover_data)
print(f" Cover uploaded")
else:
cover_url = f"{PUBLIC_URL}/{cover_key}"
print(f" Cover exists")
else:
print(" No cover.jpg")
# Manga record
cur.execute('SELECT id, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
row = cur.fetchone()
if row:
manga_id, existing_cover = row
print(f" Manga exists (id: {manga_id})")
if cover_url and cover_url != existing_cover:
cur.execute(
'UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
(cover_url, manga_id),
)
conn.commit()
else:
cur.execute(
"""
INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt")
VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW())
RETURNING id
""",
(title, description, cover_url, slug, genre),
)
manga_id = cur.fetchone()[0]
conn.commit()
print(f" Created manga (id: {manga_id})")
# Chapters
chapter_dirs = sorted(
[d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")],
key=lambda d: parse_chapter_dir(d.name)[0],
)
for chapter_dir in chapter_dirs:
order_num, chapter_title = parse_chapter_dir(chapter_dir.name)
if order_num == 0:
continue
cur.execute(
'SELECT id FROM "Chapter" WHERE "mangaId" = %s AND number = %s',
(manga_id, order_num),
)
if cur.fetchone():
print(f" [{order_num}] {chapter_title} — skip")
continue
page_files = sorted(
[f for f in chapter_dir.iterdir() if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")],
key=lambda f: int(re.search(r"(\d+)", f.stem).group(1)) if re.search(r"(\d+)", f.stem) else 0,
)
if not page_files:
continue
print(f" [{order_num}] {chapter_title} ({len(page_files)} pages)")
cur.execute(
'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
(manga_id, order_num, chapter_title),
)
chapter_id = cur.fetchone()[0]
conn.commit()
# Parallel convert + upload
def process_page(args):
j, page_file = args
r2_key = f"manga/{slug}/chapters/{order_num}/{j}.webp"
if not r2_key_exists(r2_key):
webp_data = convert_to_webp(page_file)
return j, upload_to_r2(r2_key, webp_data)
return j, f"{PUBLIC_URL}/{r2_key}"
page_urls = {}
done = 0
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
futures = {pool.submit(process_page, (j, f)): j for j, f in enumerate(page_files, 1)}
for future in as_completed(futures):
j, url = future.result()
page_urls[j] = url
done += 1
print(f" {done}/{len(page_files)}", end="\r")
# Batch insert page records in order
for j in sorted(page_urls):
cur.execute(
'INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)',
(chapter_id, j, page_urls[j]),
)
conn.commit()
print(f" {len(page_files)} pages uploaded" + " " * 10)
# ── Interactive loop ──────────────────────────────────────
def show_menu():
print()
print("=" * 40)
print(" Manga Uploader")
print("=" * 40)
print(" 1. Status")
print(" 2. Upload all manga")
print(" 3. Upload specific manga")
print(" 4. Reset R2 storage")
print(" 0. Quit")
print()
def main():
conn = get_db()
try:
while True:
show_menu()
try:
choice = input("Select [0-4]: ").strip()
except (EOFError, KeyboardInterrupt):
print()
break
if choice == "0":
break
elif choice == "1":
cmd_status(conn)
elif choice == "2":
cmd_upload(conn)
elif choice == "3":
local = list_local_manga()
if not local:
print(" No manga in manga-content/")
continue
print()
for i, name in enumerate(local, 1):
print(f" {i}. {name}")
print()
pick = input("Select manga number: ").strip()
try:
idx = int(pick) - 1
if 0 <= idx < len(local):
cmd_upload(conn, local[idx])
else:
print(" Invalid selection")
except ValueError:
print(" Invalid input")
elif choice == "4":
confirm = input(" Delete ALL R2 objects? [y/N] ").strip().lower()
if confirm == "y":
cmd_reset()
else:
print(" Cancelled.")
else:
print(" Invalid choice")
finally:
conn.close()
print("Bye!")
if __name__ == "__main__":
main()