sunnymh-manga-dl/upload.py

"""
Interactive manga uploader — Cloudflare R2 + PostgreSQL.

R2 storage layout:
    manga/<slug>/cover.webp
    manga/<slug>/chapters/<number>/<page>.webp

Usage:
    python upload.py
"""

import io
import json
import os
import re
import sys
from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path

import boto3
import psycopg2
from PIL import Image
from dotenv import load_dotenv

load_dotenv()

ROOT_DIR = Path(__file__).parent
CONTENT_DIR = ROOT_DIR / "manga-content"

# R2 config
s3 = boto3.client(
    "s3",
    endpoint_url=f"https://{os.environ['R2_ACCOUNT_ID']}.r2.cloudflarestorage.com",
    aws_access_key_id=os.environ["R2_ACCESS_KEY"],
    aws_secret_access_key=os.environ["R2_SECRET_KEY"],
    region_name="auto",
)
BUCKET = os.environ["R2_BUCKET"]
PUBLIC_URL = os.environ["R2_PUBLIC_URL"].rstrip("/")

# Database
DATABASE_URL = os.environ["DATABASE_URL"]
UPLOAD_WORKERS = 8


def convert_to_webp(image_path, quality=80):
    img = Image.open(image_path)
    buf = io.BytesIO()
    img.save(buf, format="WEBP", quality=quality)
    buf.seek(0)
    return buf.read()


def make_cover(image_path, width=400, height=560):
    img = Image.open(image_path)
    target_ratio = width / height
    img_ratio = img.width / img.height
    if img_ratio > target_ratio:
        new_width = int(img.height * target_ratio)
        left = (img.width - new_width) // 2
        img = img.crop((left, 0, left + new_width, img.height))
    else:
        new_height = int(img.width / target_ratio)
        img = img.crop((0, 0, img.width, new_height))
    img = img.resize((width, height), Image.LANCZOS)
    buf = io.BytesIO()
    img.save(buf, format="WEBP", quality=85)
    buf.seek(0)
    return buf.read()


def upload_to_r2(key, data, content_type="image/webp"):
    s3.put_object(Bucket=BUCKET, Key=key, Body=data, ContentType=content_type)
    return f"{PUBLIC_URL}/{key}"


def r2_key_exists(key):
    try:
        s3.head_object(Bucket=BUCKET, Key=key)
        return True
    except s3.exceptions.ClientError:
        return False


def get_db():
    conn = psycopg2.connect(DATABASE_URL)
    conn.set_client_encoding("UTF8")
    return conn


def parse_chapter_dir(dir_name):
    """Parse '1 001. 序章' -> (1, '001. 序章')."""
    m = re.match(r"^(\d+)\s+(.+)$", dir_name)
    if m:
        return int(m.group(1)), m.group(2)
    return 0, dir_name


def list_local_manga():
    """List manga directories in manga-content/."""
    dirs = sorted(
        d.name for d in CONTENT_DIR.iterdir()
        if d.is_dir() and not d.name.startswith(".")
    )
    return dirs


# ── Commands ──────────────────────────────────────────────


def cmd_reset():
    """Clear all R2 storage."""
    print("\nClearing R2 bucket...")
    total = 0
    batches = []
    paginator = s3.get_paginator("list_objects_v2")
    for page in paginator.paginate(Bucket=BUCKET):
        objects = page.get("Contents", [])
        if not objects:
            break
        batches.append([{"Key": obj["Key"]} for obj in objects])

    # Delete batches in parallel
    def delete_batch(keys):
        s3.delete_objects(Bucket=BUCKET, Delete={"Objects": keys})
        return len(keys)

    with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
        for count in pool.map(delete_batch, batches):
            total += count
            print(f"  {total} deleted", end="\r")

    print(f"  {total} objects deleted from R2" + " " * 10)
    print("R2 cleared. Run 'upload' to re-upload.\n")


def cmd_status(conn):
    """Show current state of R2 and database."""
    cur = conn.cursor()

    # DB counts
    cur.execute('SELECT COUNT(*) FROM "Manga"')
    manga_count = cur.fetchone()[0]
    cur.execute('SELECT COUNT(*) FROM "Chapter"')
    chapter_count = cur.fetchone()[0]
    cur.execute('SELECT COUNT(*) FROM "Page"')
    page_count = cur.fetchone()[0]

    print(f"\n  Database: {manga_count} manga, {chapter_count} chapters, {page_count} pages")

    # List manga in DB
    cur.execute('SELECT slug, title, (SELECT COUNT(*) FROM "Chapter" WHERE "mangaId" = "Manga".id) FROM "Manga" ORDER BY slug')
    for slug, title, ch_count in cur.fetchall():
        print(f"    {slug}: {title} ({ch_count} chapters)")

    # R2 count
    total = 0
    paginator = s3.get_paginator("list_objects_v2")
    for page in paginator.paginate(Bucket=BUCKET):
        total += len(page.get("Contents", []))
    print(f"  R2: {total} objects")

    # Local
    local = list_local_manga()
    print(f"  Local: {len(local)} manga in manga-content/")
    for name in local:
        manga_path = CONTENT_DIR / name
        chapters = [d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")]
        has_cover = (manga_path / "cover.jpg").exists()
        print(f"    {name}: {len(chapters)} chapters, cover: {'yes' if has_cover else 'no'}")
    print()


def cmd_upload(conn, manga_name=None):
    """Upload manga to R2 and create DB records."""
    if manga_name:
        manga_dirs = [manga_name]
        if not (CONTENT_DIR / manga_name).is_dir():
            print(f"  Not found: {CONTENT_DIR / manga_name}")
            return
    else:
        manga_dirs = list_local_manga()

    if not manga_dirs:
        print("  No manga found in manga-content/")
        return

    print(f"\n  Uploading {len(manga_dirs)} manga(s)...")

    for name in manga_dirs:
        upload_manga(name, conn)

    print("\nUpload complete!")


def upload_manga(manga_name, conn):
    manga_path = CONTENT_DIR / manga_name
    detail_path = manga_path / "detail.json"

    if not detail_path.exists():
        print(f"  Skipping {manga_name}: no detail.json")
        return

    detail = json.loads(detail_path.read_text(encoding="utf-8"))
    title = detail.get("mg-title", manga_name)
    slug = manga_name
    genres = detail.get("mg-genres", [])
    description = detail.get("mg-description", "")
    if not description and genres:
        description = f"Genres: {', '.join(genres)}"
    genre = genres[0] if genres else "Drama"

    print(f"\n  {'='*50}")
    print(f"  {title} ({slug})")
    print(f"  {'='*50}")

    cur = conn.cursor()

    # Cover
    cover_file = manga_path / "cover.jpg"
    cover_url = ""
    cover_key = f"manga/{slug}/cover.webp"

    if cover_file.exists():
        if not r2_key_exists(cover_key):
            cover_data = make_cover(cover_file)
            cover_url = upload_to_r2(cover_key, cover_data)
            print(f"  Cover uploaded")
        else:
            cover_url = f"{PUBLIC_URL}/{cover_key}"
            print(f"  Cover exists")
    else:
        print("  No cover.jpg")

    # Manga record
    cur.execute('SELECT id, "coverUrl" FROM "Manga" WHERE slug = %s', (slug,))
    row = cur.fetchone()

    if row:
        manga_id, existing_cover = row
        print(f"  Manga exists (id: {manga_id})")
        if cover_url and cover_url != existing_cover:
            cur.execute(
                'UPDATE "Manga" SET "coverUrl" = %s, "updatedAt" = NOW() WHERE id = %s',
                (cover_url, manga_id),
            )
            conn.commit()
    else:
        cur.execute(
            """
            INSERT INTO "Manga" (title, description, "coverUrl", slug, genre, status, "createdAt", "updatedAt")
            VALUES (%s, %s, %s, %s, %s, 'PUBLISHED', NOW(), NOW())
            RETURNING id
            """,
            (title, description, cover_url, slug, genre),
        )
        manga_id = cur.fetchone()[0]
        conn.commit()
        print(f"  Created manga (id: {manga_id})")

    # Chapters
    chapter_dirs = sorted(
        [d for d in manga_path.iterdir() if d.is_dir() and not d.name.startswith(".")],
        key=lambda d: parse_chapter_dir(d.name)[0],
    )

    for chapter_dir in chapter_dirs:
        order_num, chapter_title = parse_chapter_dir(chapter_dir.name)
        if order_num == 0:
            continue

        cur.execute(
            'SELECT id FROM "Chapter" WHERE "mangaId" = %s AND number = %s',
            (manga_id, order_num),
        )
        if cur.fetchone():
            print(f"    [{order_num}] {chapter_title} — skip")
            continue

        page_files = sorted(
            [f for f in chapter_dir.iterdir() if f.suffix.lower() in (".jpg", ".jpeg", ".png", ".webp")],
            key=lambda f: int(re.search(r"(\d+)", f.stem).group(1)) if re.search(r"(\d+)", f.stem) else 0,
        )

        if not page_files:
            continue

        print(f"    [{order_num}] {chapter_title} ({len(page_files)} pages)")

        cur.execute(
            'INSERT INTO "Chapter" ("mangaId", number, title) VALUES (%s, %s, %s) RETURNING id',
            (manga_id, order_num, chapter_title),
        )
        chapter_id = cur.fetchone()[0]
        conn.commit()

        # Parallel convert + upload
        def process_page(args):
            j, page_file = args
            r2_key = f"manga/{slug}/chapters/{order_num}/{j}.webp"
            if not r2_key_exists(r2_key):
                webp_data = convert_to_webp(page_file)
                return j, upload_to_r2(r2_key, webp_data)
            return j, f"{PUBLIC_URL}/{r2_key}"

        page_urls = {}
        done = 0
        with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
            futures = {pool.submit(process_page, (j, f)): j for j, f in enumerate(page_files, 1)}
            for future in as_completed(futures):
                j, url = future.result()
                page_urls[j] = url
                done += 1
                print(f"      {done}/{len(page_files)}", end="\r")

        # Batch insert page records in order
        for j in sorted(page_urls):
            cur.execute(
                'INSERT INTO "Page" ("chapterId", number, "imageUrl") VALUES (%s, %s, %s)',
                (chapter_id, j, page_urls[j]),
            )

        conn.commit()
        print(f"      {len(page_files)} pages uploaded" + " " * 10)


# ── Interactive loop ──────────────────────────────────────


def show_menu():
    print()
    print("=" * 40)
    print("  Manga Uploader")
    print("=" * 40)
    print("  1. Status")
    print("  2. Upload all manga")
    print("  3. Upload specific manga")
    print("  4. Reset R2 storage")
    print("  0. Quit")
    print()


def main():
    conn = get_db()
    try:
        while True:
            show_menu()
            try:
                choice = input("Select [0-4]: ").strip()
            except (EOFError, KeyboardInterrupt):
                print()
                break

            if choice == "0":
                break
            elif choice == "1":
                cmd_status(conn)
            elif choice == "2":
                cmd_upload(conn)
            elif choice == "3":
                local = list_local_manga()
                if not local:
                    print("  No manga in manga-content/")
                    continue
                print()
                for i, name in enumerate(local, 1):
                    print(f"  {i}. {name}")
                print()
                pick = input("Select manga number: ").strip()
                try:
                    idx = int(pick) - 1
                    if 0 <= idx < len(local):
                        cmd_upload(conn, local[idx])
                    else:
                        print("  Invalid selection")
                except ValueError:
                    print("  Invalid input")
            elif choice == "4":
                confirm = input("  Delete ALL R2 objects? [y/N] ").strip().lower()
                if confirm == "y":
                    cmd_reset()
                else:
                    print("  Cancelled.")
            else:
                print("  Invalid choice")
    finally:
        conn.close()

    print("Bye!")


if __name__ == "__main__":
    main()