Re-upload inline during check, add URL fallback for flaky CDN
- Check missing pages: re-upload happens immediately when mismatch is detected, while the browser is still on that chapter's reader page. Eliminates the two-phase approach that required re-navigating and could hit wrong referrer. - fetch_image_bytes falls back to stripped-query URL (e.g. drops ?q=50) when the original fails — works around CDN variants that may not have the quality-scaled version. - Log the specific failing URL on fetch failure for diagnostics. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
723b82c9fc
commit
051b2e191f
129
manga.py
129
manga.py
@ -581,12 +581,9 @@ def fetch_all_pages(page, images, max_attempts=3):
|
|||||||
return page_bytes
|
return page_bytes
|
||||||
|
|
||||||
|
|
||||||
def fetch_image_bytes(page, img):
|
def _fetch_via_page(page, url, ref_policy):
|
||||||
"""Fetch image via browser network stack using given page."""
|
|
||||||
url = img["url"]
|
|
||||||
ref_policy = "no-referrer" if img.get("no_referrer") else "origin"
|
|
||||||
try:
|
try:
|
||||||
with page.expect_response(lambda r: url in r.url, timeout=15000) as resp_info:
|
with page.expect_response(lambda r: url.split("?")[0] in r.url, timeout=15000) as resp_info:
|
||||||
page.evaluate("([u, r]) => fetch(u, { referrerPolicy: r })", [url, ref_policy])
|
page.evaluate("([u, r]) => fetch(u, { referrerPolicy: r })", [url, ref_policy])
|
||||||
response = resp_info.value
|
response = resp_info.value
|
||||||
if response.status == 200:
|
if response.status == 200:
|
||||||
@ -598,6 +595,25 @@ def fetch_image_bytes(page, img):
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_image_bytes(page, img):
|
||||||
|
"""Fetch image via browser network stack using given page. Tries URL variants on failure."""
|
||||||
|
url = img["url"]
|
||||||
|
ref_policy = "no-referrer" if img.get("no_referrer") else "origin"
|
||||||
|
|
||||||
|
# Try original URL
|
||||||
|
body = _fetch_via_page(page, url, ref_policy)
|
||||||
|
if body:
|
||||||
|
return body
|
||||||
|
|
||||||
|
# Fallback: strip query string (e.g., ?q=50)
|
||||||
|
if "?" in url:
|
||||||
|
body = _fetch_via_page(page, url.split("?")[0], ref_policy)
|
||||||
|
if body:
|
||||||
|
return body
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def download_image(page, img, save_path):
|
def download_image(page, img, save_path):
|
||||||
"""Fetch image and save to disk."""
|
"""Fetch image and save to disk."""
|
||||||
if save_path.exists():
|
if save_path.exists():
|
||||||
@ -1715,8 +1731,8 @@ def tui_check_missing_pages():
|
|||||||
return
|
return
|
||||||
|
|
||||||
cur2 = conn.cursor()
|
cur2 = conn.cursor()
|
||||||
to_reupload = []
|
fixed_dims = 0
|
||||||
to_fix_dims = []
|
reuploaded = 0
|
||||||
|
|
||||||
print(f"\n Checking {len(selected_chapters)} chapters...")
|
print(f"\n Checking {len(selected_chapters)} chapters...")
|
||||||
for ch_id, ch_num, ch_title in selected_chapters:
|
for ch_id, ch_num, ch_title in selected_chapters:
|
||||||
@ -1740,8 +1756,42 @@ def tui_check_missing_pages():
|
|||||||
r2_count = r2_count_by_prefix(f"manga/{slug}/chapters/{ch_num}/")
|
r2_count = r2_count_by_prefix(f"manga/{slug}/chapters/{ch_num}/")
|
||||||
|
|
||||||
if site_count != r2_count:
|
if site_count != r2_count:
|
||||||
print(f" [{ch_num}] {ch_title}: site={site_count}, R2={r2_count} — re-upload")
|
print(f" [{ch_num}] {ch_title}: site={site_count}, R2={r2_count} — re-uploading...")
|
||||||
to_reupload.append((ch_id, ch_num, ch_title, ch, images))
|
# Re-upload IMMEDIATELY while browser is on this chapter's reader page
|
||||||
|
page_bytes = fetch_all_pages(session.page, images)
|
||||||
|
if len(page_bytes) < len(images):
|
||||||
|
missing = [pn for pn in range(1, len(images) + 1) if pn not in page_bytes]
|
||||||
|
print(f" Could not fetch pages: {missing}")
|
||||||
|
for mn in missing:
|
||||||
|
print(f" page {mn}: {images[mn-1]['url']}")
|
||||||
|
print(f" Skipping chapter")
|
||||||
|
continue
|
||||||
|
|
||||||
|
def upload_page(args, _slug=slug, _n=ch_num):
|
||||||
|
pn, raw = args
|
||||||
|
r2_key = f"manga/{_slug}/chapters/{_n}/{pn}.webp"
|
||||||
|
with Image.open(io.BytesIO(raw)) as img:
|
||||||
|
w, h = img.width, img.height
|
||||||
|
return pn, upload_to_r2(r2_key, convert_to_webp(io.BytesIO(raw))), w, h
|
||||||
|
|
||||||
|
page_urls = {}
|
||||||
|
done = 0
|
||||||
|
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
||||||
|
for pn, r2_url, w, h in pool.map(upload_page, page_bytes.items()):
|
||||||
|
page_urls[pn] = (r2_url, w, h)
|
||||||
|
done += 1
|
||||||
|
print(f" R2: {done}/{len(page_bytes)}", end="\r")
|
||||||
|
|
||||||
|
cur2.execute('DELETE FROM "Page" WHERE "chapterId" = %s', (ch_id,))
|
||||||
|
for pn in sorted(page_urls):
|
||||||
|
url, w, h = page_urls[pn]
|
||||||
|
cur2.execute(
|
||||||
|
'INSERT INTO "Page" ("chapterId", number, "imageUrl", width, height) VALUES (%s, %s, %s, %s, %s)',
|
||||||
|
(ch_id, pn, url, w, h),
|
||||||
|
)
|
||||||
|
conn.commit()
|
||||||
|
reuploaded += 1
|
||||||
|
print(f" {len(page_urls)} pages restored" + " " * 20)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Count matches — check if DB has valid width/height for all pages
|
# Count matches — check if DB has valid width/height for all pages
|
||||||
@ -1756,19 +1806,9 @@ def tui_check_missing_pages():
|
|||||||
db_count, bad_w, bad_h, min_w, max_w, min_h, max_h = cur2.fetchone()
|
db_count, bad_w, bad_h, min_w, max_w, min_h, max_h = cur2.fetchone()
|
||||||
bad_count = max(bad_w, bad_h)
|
bad_count = max(bad_w, bad_h)
|
||||||
if bad_count > 0:
|
if bad_count > 0:
|
||||||
print(f" [{ch_num}] {ch_title}: {bad_count} pages need dims (w {min_w}-{max_w}, h {min_h}-{max_h}) — fix from R2")
|
print(f" [{ch_num}] {ch_title}: {bad_count} pages need dims — fixing from R2...")
|
||||||
to_fix_dims.append((ch_id, ch_num, ch_title))
|
|
||||||
else:
|
|
||||||
print(f" [{ch_num}] {ch_title}: {site_count} pages OK (w {min_w}-{max_w}, h {min_h}-{max_h})")
|
|
||||||
|
|
||||||
# Fix dimensions by reading existing R2 objects (no re-upload)
|
|
||||||
if to_fix_dims:
|
|
||||||
print(f"\n Fixing dimensions for {len(to_fix_dims)} chapter(s)...")
|
|
||||||
for ch_id, ch_num, ch_title in to_fix_dims:
|
|
||||||
if esc.stop.is_set():
|
|
||||||
break
|
|
||||||
cur2.execute(
|
cur2.execute(
|
||||||
'SELECT id, number, "imageUrl" FROM "Page" WHERE "chapterId" = %s '
|
'SELECT id, number FROM "Page" WHERE "chapterId" = %s '
|
||||||
'AND (width IS NULL OR width = 0 OR height IS NULL OR height = 0) '
|
'AND (width IS NULL OR width = 0 OR height IS NULL OR height = 0) '
|
||||||
'ORDER BY number',
|
'ORDER BY number',
|
||||||
(ch_id,),
|
(ch_id,),
|
||||||
@ -1776,7 +1816,7 @@ def tui_check_missing_pages():
|
|||||||
pages = cur2.fetchall()
|
pages = cur2.fetchall()
|
||||||
|
|
||||||
def read_dims(args, _slug=slug, _n=ch_num):
|
def read_dims(args, _slug=slug, _n=ch_num):
|
||||||
page_id, pn, _url = args
|
page_id, pn = args
|
||||||
r2_key = f"manga/{_slug}/chapters/{_n}/{pn}.webp"
|
r2_key = f"manga/{_slug}/chapters/{_n}/{pn}.webp"
|
||||||
try:
|
try:
|
||||||
data = s3.get_object(Bucket=BUCKET, Key=r2_key)["Body"].read()
|
data = s3.get_object(Bucket=BUCKET, Key=r2_key)["Body"].read()
|
||||||
@ -1795,47 +1835,12 @@ def tui_check_missing_pages():
|
|||||||
)
|
)
|
||||||
updated += 1
|
updated += 1
|
||||||
conn.commit()
|
conn.commit()
|
||||||
print(f" [{ch_num}] {ch_title}: {updated}/{len(pages)} dims updated")
|
fixed_dims += 1
|
||||||
|
print(f" {updated}/{len(pages)} dims updated")
|
||||||
|
else:
|
||||||
|
print(f" [{ch_num}] {ch_title}: {site_count} pages OK (w {min_w}-{max_w}, h {min_h}-{max_h})")
|
||||||
|
|
||||||
if not to_reupload:
|
print(f"\n Done: {reuploaded} re-uploaded, {fixed_dims} dim-fixed")
|
||||||
if not to_fix_dims:
|
|
||||||
print("\n All selected chapters are complete.")
|
|
||||||
return
|
|
||||||
|
|
||||||
print(f"\n {len(to_reupload)} chapter(s) need re-upload")
|
|
||||||
|
|
||||||
for ch_id, ch_num, ch_title, ch, images in to_reupload:
|
|
||||||
if esc.stop.is_set():
|
|
||||||
break
|
|
||||||
print(f"\n Re-uploading [{ch_num}] {ch_title}")
|
|
||||||
page_bytes = fetch_all_pages(session.page, images)
|
|
||||||
if len(page_bytes) < len(images):
|
|
||||||
missing = [pn for pn in range(1, len(images) + 1) if pn not in page_bytes]
|
|
||||||
print(f" Could not fetch pages: {missing}, skipping")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Upload to R2 (overwrites existing)
|
|
||||||
def upload_page(args, _slug=slug, _n=ch_num):
|
|
||||||
pn, raw = args
|
|
||||||
r2_key = f"manga/{_slug}/chapters/{_n}/{pn}.webp"
|
|
||||||
w, h, webp = probe_and_webp(io.BytesIO(raw))
|
|
||||||
return pn, upload_to_r2(r2_key, webp), w, h
|
|
||||||
|
|
||||||
page_urls = {}
|
|
||||||
done = 0
|
|
||||||
with ThreadPoolExecutor(max_workers=UPLOAD_WORKERS) as pool:
|
|
||||||
futures = {pool.submit(upload_page, (pn, raw)): pn for pn, raw in page_bytes.items()}
|
|
||||||
for future in as_completed(futures):
|
|
||||||
pn, r2_url, w, h = future.result()
|
|
||||||
page_urls[pn] = (r2_url, w, h)
|
|
||||||
done += 1
|
|
||||||
print(f" R2: {done}/{len(page_bytes)}", end="\r")
|
|
||||||
|
|
||||||
# Replace Page records
|
|
||||||
cur2.execute('DELETE FROM "Page" WHERE "chapterId" = %s', (ch_id,))
|
|
||||||
insert_pages(cur2, ch_id, page_urls)
|
|
||||||
conn.commit()
|
|
||||||
print(f" {len(page_urls)} pages restored" + " " * 20)
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with_browser(run)
|
with_browser(run)
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user