sunnymh/manga-site/scripts/backfill-page-dims.ts

90 lines
2.5 KiB
TypeScript

/**
* Backfill `width` and `height` on Page rows by range-fetching the first
* 16 KB of each image from R2 and parsing its header with `image-size`.
*
* Idempotent: only targets rows where width=0 or height=0.
*
* Usage: npx tsx scripts/backfill-page-dims.ts
*/
import { PrismaClient } from "@prisma/client";
import { S3Client, GetObjectCommand } from "@aws-sdk/client-s3";
import { imageSize } from "image-size";
import { keyFromPublicUrl } from "@/lib/r2";
const prisma = new PrismaClient();
const BUCKET = process.env.R2_BUCKET;
if (!BUCKET) throw new Error("R2_BUCKET must be set");
const s3 = new S3Client({
region: "auto",
endpoint: `https://${process.env.R2_ACCOUNT_ID}.r2.cloudflarestorage.com`,
credentials: {
accessKeyId: process.env.R2_ACCESS_KEY!,
secretAccessKey: process.env.R2_SECRET_KEY!,
},
});
const CONCURRENCY = 10;
const HEADER_BYTES = 16_384;
async function fetchHeader(key: string): Promise<Uint8Array> {
const res = await s3.send(
new GetObjectCommand({
Bucket: BUCKET,
Key: key,
Range: `bytes=0-${HEADER_BYTES - 1}`,
})
);
if (!res.Body) throw new Error(`No body for ${key}`);
return res.Body.transformToByteArray();
}
async function main() {
const pages = await prisma.page.findMany({
where: { OR: [{ width: 0 }, { height: 0 }] },
orderBy: { id: "asc" },
});
console.log(`Probing ${pages.length} pages with dims unset`);
let done = 0;
let failed = 0;
for (let i = 0; i < pages.length; i += CONCURRENCY) {
const batch = pages.slice(i, i + CONCURRENCY);
await Promise.all(
batch.map(async (page) => {
try {
const key = keyFromPublicUrl(page.imageUrl);
if (!key) throw new Error(`URL outside R2 prefix: ${page.imageUrl}`);
const header = await fetchHeader(key);
const { width, height } = imageSize(header);
if (!width || !height) {
throw new Error("image-size returned no dimensions");
}
await prisma.page.update({
where: { id: page.id },
data: { width, height },
});
done++;
} catch (err) {
failed++;
console.error(
`✗ page ${page.id} (${page.imageUrl}):`,
err instanceof Error ? err.message : err
);
}
})
);
console.log(`${Math.min(i + CONCURRENCY, pages.length)}/${pages.length}`);
}
console.log(`\nDone. Probed: ${done}, failed: ${failed}`);
await prisma.$disconnect();
}
main().catch((e) => {
console.error(e);
process.exit(1);
});