/** * Backfill `width` and `height` on Page rows by range-fetching the first * 16 KB of each image from R2 and parsing its header with `image-size`. * * Idempotent: only targets rows where width=0 or height=0. * * Usage: npx tsx scripts/backfill-page-dims.ts */ import { PrismaClient } from "@prisma/client"; import { S3Client, GetObjectCommand } from "@aws-sdk/client-s3"; import { imageSize } from "image-size"; import { keyFromPublicUrl } from "@/lib/r2"; const prisma = new PrismaClient(); const BUCKET = process.env.R2_BUCKET; if (!BUCKET) throw new Error("R2_BUCKET must be set"); const s3 = new S3Client({ region: "auto", endpoint: `https://${process.env.R2_ACCOUNT_ID}.r2.cloudflarestorage.com`, credentials: { accessKeyId: process.env.R2_ACCESS_KEY!, secretAccessKey: process.env.R2_SECRET_KEY!, }, }); const CONCURRENCY = 10; const HEADER_BYTES = 16_384; async function fetchHeader(key: string): Promise { const res = await s3.send( new GetObjectCommand({ Bucket: BUCKET, Key: key, Range: `bytes=0-${HEADER_BYTES - 1}`, }) ); if (!res.Body) throw new Error(`No body for ${key}`); return res.Body.transformToByteArray(); } async function main() { const pages = await prisma.page.findMany({ where: { OR: [{ width: 0 }, { height: 0 }] }, orderBy: { id: "asc" }, }); console.log(`Probing ${pages.length} pages with dims unset`); let done = 0; let failed = 0; for (let i = 0; i < pages.length; i += CONCURRENCY) { const batch = pages.slice(i, i + CONCURRENCY); await Promise.all( batch.map(async (page) => { try { const key = keyFromPublicUrl(page.imageUrl); if (!key) throw new Error(`URL outside R2 prefix: ${page.imageUrl}`); const header = await fetchHeader(key); const { width, height } = imageSize(header); if (!width || !height) { throw new Error("image-size returned no dimensions"); } await prisma.page.update({ where: { id: page.id }, data: { width, height }, }); done++; } catch (err) { failed++; console.error( `✗ page ${page.id} (${page.imageUrl}):`, err instanceof Error ? err.message : err ); } }) ); console.log(`${Math.min(i + CONCURRENCY, pages.length)}/${pages.length}`); } console.log(`\nDone. Probed: ${done}, failed: ${failed}`); await prisma.$disconnect(); } main().catch((e) => { console.error(e); process.exit(1); });