cm_whatsapp_bot_v1/apps/bot/src/scheduler/sweep-stale-runs.ts

import { sql } from "drizzle-orm";
import { db } from "../db.js";
import { logger } from "../logger.js";

/**
 * Recover from "bot crashed / restarted mid-run" crashes.
 *
 * fire-reminder writes the run row with status='pending' UP FRONT so
 * the Activity tab can show progress mid-run, then flips to a
 * terminal status (success/partial/failed/paused/skipped) once it's
 * done. If the bot dies between those two writes, the row sits at
 * 'pending' forever — pg-boss already marked the job 'completed', so
 * it won't retry.
 *
 * This sweep runs at bot startup. It finds any 'pending' run older
 * than `maxAgeMs` (default 5 minutes — enough slack that a real
 * mid-run rebalance to another worker isn't accidentally killed) and:
 *
 *   • Flips the run to 'failed' with a clear error_summary so the UI
 *     stops showing it as in-flight.
 *   • Flips its pending run_target rows to 'skipped' with the same
 *     reason so per-group counts make sense.
 *
 * Does NOT touch the parent reminder's lifecycle status — the row was
 * 'active' when the run started and stays that way; the next
 * occurrence (cron) or operator action will fire a fresh run.
 */
export async function sweepStalePendingRuns(
  maxAgeMs: number = 5 * 60 * 1000,
): Promise<{ runs: number; targets: number }> {
  const cutoffMs = Date.now() - maxAgeMs;
  const cutoff = new Date(cutoffMs);

  const runs = await db.execute(sql`
    UPDATE reminder_runs
    SET status = 'failed',
        error_summary = 'Bot restarted before this run completed.'
    WHERE status = 'pending'
      AND fired_at < ${cutoff}
    RETURNING id
  `);
  const runRows = runs.rows as Array<{ id: string }>;
  if (runRows.length === 0) {
    logger.info("sweep-stale-runs: no stale pending runs");
    return { runs: 0, targets: 0 };
  }

  const ids = runRows.map((r) => r.id);
  const targets = await db.execute(sql`
    UPDATE reminder_run_targets
    SET status = 'skipped',
        error = 'bot restarted before this group could be sent'
    WHERE status = 'pending'
      AND run_id IN (${sql.join(ids.map((id) => sql`${id}`), sql`, `)})
    RETURNING id
  `);
  const targetCount = (targets.rows as Array<unknown>).length;

  logger.warn(
    { runs: runRows.length, targets: targetCount, cutoff: cutoff.toISOString() },
    "sweep-stale-runs: cleared stale pending runs",
  );
  return { runs: runRows.length, targets: targetCount };
}