import { sql } from "drizzle-orm"; import { db } from "../db.js"; import { logger } from "../logger.js"; /** * Recover from "bot crashed / restarted mid-run" crashes. * * fire-reminder writes the run row with status='pending' UP FRONT so * the Activity tab can show progress mid-run, then flips to a * terminal status (success/partial/failed/paused/skipped) once it's * done. If the bot dies between those two writes, the row sits at * 'pending' forever — pg-boss already marked the job 'completed', so * it won't retry. * * This sweep runs at bot startup. It finds any 'pending' run older * than `maxAgeMs` (default 5 minutes — enough slack that a real * mid-run rebalance to another worker isn't accidentally killed) and: * * • Flips the run to 'failed' with a clear error_summary so the UI * stops showing it as in-flight. * • Flips its pending run_target rows to 'skipped' with the same * reason so per-group counts make sense. * * Does NOT touch the parent reminder's lifecycle status — the row was * 'active' when the run started and stays that way; the next * occurrence (cron) or operator action will fire a fresh run. */ export async function sweepStalePendingRuns( maxAgeMs: number = 5 * 60 * 1000, ): Promise<{ runs: number; targets: number }> { const cutoffMs = Date.now() - maxAgeMs; const cutoff = new Date(cutoffMs); const runs = await db.execute(sql` UPDATE reminder_runs SET status = 'failed', error_summary = 'Bot restarted before this run completed.' WHERE status = 'pending' AND fired_at < ${cutoff} RETURNING id `); const runRows = runs.rows as Array<{ id: string }>; if (runRows.length === 0) { logger.info("sweep-stale-runs: no stale pending runs"); return { runs: 0, targets: 0 }; } const ids = runRows.map((r) => r.id); const targets = await db.execute(sql` UPDATE reminder_run_targets SET status = 'skipped', error = 'bot restarted before this group could be sent' WHERE status = 'pending' AND run_id IN (${sql.join(ids.map((id) => sql`${id}`), sql`, `)}) RETURNING id `); const targetCount = (targets.rows as Array).length; logger.warn( { runs: runRows.length, targets: targetCount, cutoff: cutoff.toISOString() }, "sweep-stale-runs: cleared stale pending runs", ); return { runs: runRows.length, targets: targetCount }; }