git-subtree-dir: manga-site git-subtree-split: f2ef775f7095dc2b107b576cd4053593e89dd887
90 lines
2.5 KiB
TypeScript
90 lines
2.5 KiB
TypeScript
/**
|
|
* Backfill `width` and `height` on Page rows by range-fetching the first
|
|
* 16 KB of each image from R2 and parsing its header with `image-size`.
|
|
*
|
|
* Idempotent: only targets rows where width=0 or height=0.
|
|
*
|
|
* Usage: npx tsx scripts/backfill-page-dims.ts
|
|
*/
|
|
import { PrismaClient } from "@prisma/client";
|
|
import { S3Client, GetObjectCommand } from "@aws-sdk/client-s3";
|
|
import { imageSize } from "image-size";
|
|
import { keyFromPublicUrl } from "@/lib/r2";
|
|
|
|
const prisma = new PrismaClient();
|
|
|
|
const BUCKET = process.env.R2_BUCKET;
|
|
if (!BUCKET) throw new Error("R2_BUCKET must be set");
|
|
|
|
const s3 = new S3Client({
|
|
region: "auto",
|
|
endpoint: `https://${process.env.R2_ACCOUNT_ID}.r2.cloudflarestorage.com`,
|
|
credentials: {
|
|
accessKeyId: process.env.R2_ACCESS_KEY!,
|
|
secretAccessKey: process.env.R2_SECRET_KEY!,
|
|
},
|
|
});
|
|
|
|
const CONCURRENCY = 10;
|
|
const HEADER_BYTES = 16_384;
|
|
|
|
async function fetchHeader(key: string): Promise<Uint8Array> {
|
|
const res = await s3.send(
|
|
new GetObjectCommand({
|
|
Bucket: BUCKET,
|
|
Key: key,
|
|
Range: `bytes=0-${HEADER_BYTES - 1}`,
|
|
})
|
|
);
|
|
if (!res.Body) throw new Error(`No body for ${key}`);
|
|
return res.Body.transformToByteArray();
|
|
}
|
|
|
|
async function main() {
|
|
const pages = await prisma.page.findMany({
|
|
where: { OR: [{ width: 0 }, { height: 0 }] },
|
|
orderBy: { id: "asc" },
|
|
});
|
|
console.log(`Probing ${pages.length} pages with dims unset`);
|
|
|
|
let done = 0;
|
|
let failed = 0;
|
|
|
|
for (let i = 0; i < pages.length; i += CONCURRENCY) {
|
|
const batch = pages.slice(i, i + CONCURRENCY);
|
|
await Promise.all(
|
|
batch.map(async (page) => {
|
|
try {
|
|
const key = keyFromPublicUrl(page.imageUrl);
|
|
if (!key) throw new Error(`URL outside R2 prefix: ${page.imageUrl}`);
|
|
const header = await fetchHeader(key);
|
|
const { width, height } = imageSize(header);
|
|
if (!width || !height) {
|
|
throw new Error("image-size returned no dimensions");
|
|
}
|
|
await prisma.page.update({
|
|
where: { id: page.id },
|
|
data: { width, height },
|
|
});
|
|
done++;
|
|
} catch (err) {
|
|
failed++;
|
|
console.error(
|
|
`✗ page ${page.id} (${page.imageUrl}):`,
|
|
err instanceof Error ? err.message : err
|
|
);
|
|
}
|
|
})
|
|
);
|
|
console.log(`${Math.min(i + CONCURRENCY, pages.length)}/${pages.length}`);
|
|
}
|
|
|
|
console.log(`\nDone. Probed: ${done}, failed: ${failed}`);
|
|
await prisma.$disconnect();
|
|
}
|
|
|
|
main().catch((e) => {
|
|
console.error(e);
|
|
process.exit(1);
|
|
});
|