Scheduled reminder for May 10 8:20 PM never fired. Bot logs showed
"reminder.fire: scheduled" with jobId: null at 12:18 UTC — pg-boss
returned null because the queue was on policy=stately, which dedupes
sends across the (created/active/retry) state cone by singletonKey.
A previous schedule for the same reminder (next recurring fire,
created earlier) was still in 'created' state, so the new send for
today 8:20 PM hit the dedupe and was silently rejected.
Two fixes:
1. Switch the queue policy back to 'standard' (the default) and
force-flip any existing 'stately' queue row on boot. Standard
lets us enqueue across reschedules.
2. scheduleReminderFire now does a pre-send cancel: any 'created'
job for this singletonKey is moved to 'cancelled' before the new
boss.send. The new schedule wins; old stale jobs are tombstoned
so the recurring/edit path produces exactly-one upcoming fire.
Duplicate-fire safety (the 'qwerd msg three times' bug) is already
covered at the handler level by the inner-mutex recent-run check
inside fireReminderInner — that's what stately was guarding against,
and the inner check works under standard too.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
140 lines
5.3 KiB
TypeScript
140 lines
5.3 KiB
TypeScript
import type { PgBoss } from "pg-boss";
|
|
import { sql } from "drizzle-orm";
|
|
import { logger } from "../logger.js";
|
|
import { env } from "../env.js";
|
|
import { db } from "../db.js";
|
|
import { fireReminder, type FireReminderPayload } from "./fire-reminder.js";
|
|
|
|
export const REMINDER_FIRE_QUEUE = "reminder.fire";
|
|
|
|
export async function registerReminderJobs(boss: PgBoss): Promise<void> {
|
|
// 'standard' (the default) lets us enqueue a new fire even when an
|
|
// older one for the same singletonKey is still 'created'. We need
|
|
// that for the recurring/edit path: when a reminder is rescheduled,
|
|
// scheduleReminderFire() first cancels the stale 'created' job for
|
|
// this reminder and then sends a new one — under 'stately' the
|
|
// SECOND send returns null (it dedupes against the first across
|
|
// states), so a reschedule silently dropped the new fire and the
|
|
// reminder never fired at the new time. Duplicate-fire safety is
|
|
// covered at the handler level by the inner-mutex recent-run check
|
|
// in fire-reminder.ts (see DUPLICATE_FIRE_WINDOW_MS), which catches
|
|
// the microsecond-spaced send case 'stately' was supposed to guard.
|
|
await boss.createQueue(REMINDER_FIRE_QUEUE, { policy: "standard" });
|
|
// pg-boss v12's createQueue is idempotent and DOES NOT update the
|
|
// policy on an existing queue row. Earlier deployments forced
|
|
// policy='stately' here, which broke reschedules. Force-flip back to
|
|
// 'standard' on every boot so an old queue row doesn't strand us.
|
|
try {
|
|
await db.execute(
|
|
sql`UPDATE pgboss.queue SET policy = 'standard' WHERE name = ${REMINDER_FIRE_QUEUE} AND policy <> 'standard'`,
|
|
);
|
|
} catch (err) {
|
|
logger.warn(
|
|
{ err },
|
|
"reminder.fire: failed to force queue policy=standard (handler-level dedupe still applies)",
|
|
);
|
|
}
|
|
await boss.work<FireReminderPayload>(
|
|
REMINDER_FIRE_QUEUE,
|
|
{
|
|
// Up to BOT_FIRE_CONCURRENCY workers per node, each polling and
|
|
// processing independently. Combined with the per-account mutex
|
|
// inside fireReminder, this lets reminders on DIFFERENT accounts
|
|
// run in parallel while same-account reminders take turns.
|
|
localConcurrency: env.BOT_FIRE_CONCURRENCY,
|
|
},
|
|
async (jobs) => {
|
|
const job = jobs[0];
|
|
if (!job) return;
|
|
logger.debug({ jobId: job.id, payload: job.data }, "reminder.fire: handling");
|
|
await fireReminder(job.data);
|
|
},
|
|
);
|
|
logger.info(
|
|
{ localConcurrency: env.BOT_FIRE_CONCURRENCY },
|
|
"reminder.fire: handler registered",
|
|
);
|
|
}
|
|
|
|
export async function scheduleReminderFire(
|
|
boss: PgBoss,
|
|
reminderId: string,
|
|
scheduledAt: Date,
|
|
): Promise<string | null> {
|
|
const singletonKey = `reminder:${reminderId}`;
|
|
// Replace-then-send. Any 'created' (i.e. not yet started) job for
|
|
// this reminder is the stale next-fire from the previous schedule
|
|
// attempt; nuke it so the new schedule wins. Active/completed jobs
|
|
// are left alone — those represent in-flight or already-fired runs
|
|
// and the handler-level dedupe handles overlap.
|
|
try {
|
|
const cancelled = await db.execute(
|
|
sql`UPDATE pgboss.job
|
|
SET state = 'cancelled', completed_on = now()
|
|
WHERE name = ${REMINDER_FIRE_QUEUE}
|
|
AND singleton_key = ${singletonKey}
|
|
AND state = 'created'
|
|
RETURNING id`,
|
|
);
|
|
if (cancelled.rows.length > 0) {
|
|
logger.info(
|
|
{ reminderId, cancelled: cancelled.rows.length },
|
|
"reminder.fire: cancelled stale created jobs before reschedule",
|
|
);
|
|
}
|
|
} catch (err) {
|
|
// If the cancellation step fails, log but still try to send. Worst
|
|
// case we end up with two created jobs and the handler-level
|
|
// recent-run dedupe drops the duplicate fire.
|
|
logger.warn({ err, reminderId }, "reminder.fire: pre-send cancel failed");
|
|
}
|
|
const id = await boss.send(
|
|
REMINDER_FIRE_QUEUE,
|
|
{ reminderId },
|
|
{
|
|
startAfter: scheduledAt,
|
|
retryLimit: 3,
|
|
retryDelay: 30,
|
|
retryBackoff: true,
|
|
// Singleton key kept on the job row for diagnostics + the
|
|
// pre-send cancel above, even though 'standard' policy doesn't
|
|
// dedupe by it.
|
|
singletonKey,
|
|
},
|
|
);
|
|
logger.info({ reminderId, jobId: id, scheduledAt }, "reminder.fire: scheduled");
|
|
return id;
|
|
}
|
|
|
|
/**
|
|
* Re-enqueue a paused run so fire-reminder picks up the still-pending
|
|
* targets. Different singleton key from scheduleReminderFire so the
|
|
* resume doesn't clobber the next-occurrence scheduled job and vice
|
|
* versa.
|
|
*/
|
|
export async function enqueueReminderResume(
|
|
boss: PgBoss,
|
|
reminderId: string,
|
|
runId: string,
|
|
): Promise<string | null> {
|
|
const id = await boss.send(
|
|
REMINDER_FIRE_QUEUE,
|
|
{ reminderId, runId },
|
|
{
|
|
retryLimit: 3,
|
|
retryDelay: 30,
|
|
retryBackoff: true,
|
|
singletonKey: `reminder:resume:${runId}`,
|
|
},
|
|
);
|
|
logger.info({ reminderId, runId, jobId: id }, "reminder.fire: resume enqueued");
|
|
return id;
|
|
}
|
|
|
|
export async function cancelReminderFire(_boss: PgBoss, reminderId: string): Promise<void> {
|
|
// Soft cancel: pg-boss doesn't expose a clean cancel-by-singleton API in v12.
|
|
// The scheduled job will still fire, but `fireReminder` exits early when the
|
|
// reminder row is gone. Hard cancel can be added later by storing the jobId.
|
|
logger.info({ reminderId }, "reminder.fire: cancel requested (soft, fizzles on fire)");
|
|
}
|