fix(unpair): stop session manager from racing the web's status write
Symptom
-------
Click "Unpair" on a connected account. The web action sets
\`status='unpaired'\`, but the account detail page often still shows
"Disconnected" — and on accounts that had been previously connected,
the QR pair flow restarts a few seconds later all on its own.
Cause
-----
Two races inside the session manager:
1. The web's \`unpairAccountAction\` notifies the bot via \`pg_notify\`
and then writes \`status='unpaired'\` to the row. The bot's
\`handleUnpair\` calls \`sessionManager.stop()\` which closes the
Baileys socket; Baileys eventually fires a \`connection: close\`
event which the manager's \`handleEvent\` translates into a
\`status='disconnected'\` UPDATE. Whichever write lands second wins.
The user clicks Unpair and sees Disconnected.
2. The same close-event handler schedules a 5-second
\`stop().then(start())\` reconnect for accounts whose
\`lastConnectedAt\` is set. Five seconds after unpair, the bot
silently re-opens the socket, the row flips to \`pending\`, and the
QR carousel restarts.
Fix
---
\`stop(accountId, { intentional: true })\` marks the account in a new
\`intentionalStops\` Set. When the close event lands, \`handleEvent\`
drains the flag (with \`Set.delete()\` returning whether the key was
present, so it's exactly-once and a stale flag can't bleed into a
later session) and skips both the DB UPDATE and the reconnect
schedule. The caller — only \`handleUnpair\` for now — is the one
choosing the row's next state, so we step out of its way.
The flag is set ONLY when callers ask for it. Internal recoveries
(restartRequired auto re-open, ephemeral-close back-off) keep the
default behaviour and continue to write \`disconnected\` + reschedule.
Drive-bys
---------
- Refresh the stale "the row is gone by the time we run" comment in
unpair-handler — the row stays alive now (the operator can re-pair
without retyping the label). Look up the account first so the
audit log carries the real \`operatorId\` instead of \`null\`. The
delete-account flow really does delete the row before notifying us;
the lookup tolerates that and falls back to \`null\`.
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
7b991a565d
commit
731d6d66a6
@ -8,17 +8,26 @@ import { pgNotifyWeb } from "./notify.js";
|
||||
import { logger } from "../logger.js";
|
||||
|
||||
/**
|
||||
* Unpair handler: stop the live Baileys session and remove session files.
|
||||
* The web's unpair action deletes the account row before notifying us,
|
||||
* so we expect the row to be gone by the time we run. Audit log uses a
|
||||
* null operator since the row is no longer queryable.
|
||||
* Unpair handler: stop the live Baileys session and remove the on-disk
|
||||
* session files. The web action keeps the account row alive (status =
|
||||
* 'unpaired') so the operator can re-pair without retyping the label;
|
||||
* the {intentional: true} stop tells the session manager not to race
|
||||
* the web's status write with its own "disconnected" update or
|
||||
* schedule a reconnect for a session we just chose to tear down.
|
||||
*
|
||||
* For the delete-account flow the row IS gone by the time we run;
|
||||
* the audit log lookup tolerates that.
|
||||
*/
|
||||
export async function handleUnpair(accountId: string): Promise<void> {
|
||||
await sessionManager.stop(accountId);
|
||||
await sessionManager.stop(accountId, { intentional: true });
|
||||
await rm(join(env.SESSIONS_DIR, accountId), { recursive: true, force: true });
|
||||
try {
|
||||
const row = await db.query.whatsappAccounts.findFirst({
|
||||
where: (a, { eq }) => eq(a.id, accountId),
|
||||
columns: { operatorId: true },
|
||||
});
|
||||
await writeAuditLog(db, {
|
||||
operatorId: null,
|
||||
operatorId: row?.operatorId ?? null,
|
||||
source: "web",
|
||||
action: "account.unpaired",
|
||||
targetType: "whatsapp_account",
|
||||
|
||||
@ -40,6 +40,13 @@ class SessionManager {
|
||||
private states = new Map<string, SessionState>();
|
||||
private listeners = new Set<SessionListener>();
|
||||
private reconnectTimers = new Map<string, NodeJS.Timeout>();
|
||||
/**
|
||||
* Account IDs whose next close event was triggered by us on purpose
|
||||
* (unpair, delete, app shutdown). When an entry is present, the close
|
||||
* handler skips the DB status write and the auto-reconnect schedule —
|
||||
* the caller has already chosen the row's next state.
|
||||
*/
|
||||
private intentionalStops = new Set<string>();
|
||||
|
||||
on(listener: SessionListener): () => void {
|
||||
this.listeners.add(listener);
|
||||
@ -92,7 +99,10 @@ class SessionManager {
|
||||
this.sessions.set(accountId, session);
|
||||
}
|
||||
|
||||
async stop(accountId: string): Promise<void> {
|
||||
async stop(
|
||||
accountId: string,
|
||||
opts?: { intentional?: boolean },
|
||||
): Promise<void> {
|
||||
const timer = this.reconnectTimers.get(accountId);
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
@ -100,6 +110,12 @@ class SessionManager {
|
||||
}
|
||||
const session = this.sessions.get(accountId);
|
||||
if (!session) return;
|
||||
if (opts?.intentional) {
|
||||
// Mark the upcoming close event as expected so handleEvent won't
|
||||
// race with the caller's DB write. We only set this when the
|
||||
// caller will manage the row's status themselves (unpair, delete).
|
||||
this.intentionalStops.add(accountId);
|
||||
}
|
||||
await session.close();
|
||||
this.sessions.delete(accountId);
|
||||
}
|
||||
@ -135,43 +151,52 @@ class SessionManager {
|
||||
})
|
||||
.where(eq(whatsappAccounts.id, accountId));
|
||||
} else if (event.type === "close") {
|
||||
// Drain the intentional-stop flag exactly once so a stale flag
|
||||
// can't bleed into a later, unrelated session.
|
||||
const wasIntentional = this.intentionalStops.delete(accountId);
|
||||
this.transition(accountId, { kind: "close", loggedOut: event.loggedOut });
|
||||
await db
|
||||
.update(whatsappAccounts)
|
||||
.set({ status: event.loggedOut ? "logged_out" : "disconnected" })
|
||||
.where(eq(whatsappAccounts.id, accountId));
|
||||
|
||||
if (event.loggedOut) {
|
||||
await this.stop(accountId);
|
||||
} else if (event.restartRequired) {
|
||||
// Status 515 — the post-pair-success reconnect. Always re-open
|
||||
// immediately (no 5 s back-off, no `lastConnectedAt` gate). If
|
||||
// we don't, the auth handshake never completes and the user
|
||||
// sees a spurious "Pairing timed out".
|
||||
const timer = setTimeout(() => {
|
||||
this.reconnectTimers.delete(accountId);
|
||||
void this.stop(accountId).then(() => this.start(accountId));
|
||||
}, 250);
|
||||
this.reconnectTimers.set(accountId, timer);
|
||||
if (wasIntentional) {
|
||||
// Caller (unpair/delete handler) is writing the row themselves.
|
||||
// Don't overwrite their status, and don't schedule a reconnect
|
||||
// for a session we just chose to tear down.
|
||||
} else {
|
||||
// Other ephemeral closes (refs exhausted, network blip): only
|
||||
// auto-reconnect for accounts that have been linked at least
|
||||
// once. During an initial pair attempt this would otherwise
|
||||
// restart the pair dance and rotate the QR every few seconds.
|
||||
const account = await db.query.whatsappAccounts.findFirst({
|
||||
where: (a, { eq }) => eq(a.id, accountId),
|
||||
columns: { lastConnectedAt: true },
|
||||
});
|
||||
if (account?.lastConnectedAt) {
|
||||
await db
|
||||
.update(whatsappAccounts)
|
||||
.set({ status: event.loggedOut ? "logged_out" : "disconnected" })
|
||||
.where(eq(whatsappAccounts.id, accountId));
|
||||
|
||||
if (event.loggedOut) {
|
||||
await this.stop(accountId);
|
||||
} else if (event.restartRequired) {
|
||||
// Status 515 — the post-pair-success reconnect. Always re-open
|
||||
// immediately (no 5 s back-off, no `lastConnectedAt` gate). If
|
||||
// we don't, the auth handshake never completes and the user
|
||||
// sees a spurious "Pairing timed out".
|
||||
const timer = setTimeout(() => {
|
||||
this.reconnectTimers.delete(accountId);
|
||||
void this.stop(accountId).then(() => this.start(accountId));
|
||||
}, 5000);
|
||||
}, 250);
|
||||
this.reconnectTimers.set(accountId, timer);
|
||||
} else {
|
||||
// Brand-new account that hasn't authenticated yet — let the
|
||||
// pair-handler clean up via its timeout.
|
||||
await this.stop(accountId);
|
||||
// Other ephemeral closes (refs exhausted, network blip): only
|
||||
// auto-reconnect for accounts that have been linked at least
|
||||
// once. During an initial pair attempt this would otherwise
|
||||
// restart the pair dance and rotate the QR every few seconds.
|
||||
const account = await db.query.whatsappAccounts.findFirst({
|
||||
where: (a, { eq }) => eq(a.id, accountId),
|
||||
columns: { lastConnectedAt: true },
|
||||
});
|
||||
if (account?.lastConnectedAt) {
|
||||
const timer = setTimeout(() => {
|
||||
this.reconnectTimers.delete(accountId);
|
||||
void this.stop(accountId).then(() => this.start(accountId));
|
||||
}, 5000);
|
||||
this.reconnectTimers.set(accountId, timer);
|
||||
} else {
|
||||
// Brand-new account that hasn't authenticated yet — let the
|
||||
// pair-handler clean up via its timeout.
|
||||
await this.stop(accountId);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (event.type === "qr") {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user