fix(bot): treat post-pair "restart required" close as success, not timeout

Found from the live bot log: after the user scans the QR, Baileys
receives `pair-success`, logs "pairing configured successfully, expect
to restart the connection...", and then closes the websocket with
status 515 (DisconnectReason.restartRequired) so it can reopen with
the new credentials. The next `open` event finishes the pairing.

The previous code path treated ANY close during pairing as a failure:
it parked the row as `unpaired`, wiped the QR, and emitted
session.timeout to the UI. The user was greeted with "Pairing timed
out — The QR window closed before a device was linked" at the exact
moment they had successfully paired.

Three changes:

- session.ts emits `restartRequired: boolean` on the SessionEvent close
  payload (true when reason === DisconnectReason.restartRequired).
- pair-handler treats the restart-required close as a no-op: keeps the
  listener attached and the DB row in `pending` so the upcoming `open`
  event flips it to `connected`.
- session-manager always reconnects on restart-required (250 ms after
  the close — no `lastConnectedAt` gate, no 5 s back-off).

Pure helpers (`pair-state.ts`) updated to model the new branch:
- decideOnPairClose returns null when restartRequired (don't touch DB).
- shouldAutoReconnect returns true on restartRequired regardless of
  whether the account has ever connected before.

Tests (+1; 26 bot tests, 104 web tests = 130 green):
- pair-state.test.ts gains explicit cases:
  * restart-required close → null
  * shouldAutoReconnect always true on restart-required (incl.
    first-time pair, where hasEverConnected is false — the exact
    case that broke in production).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
yiekheng 2026-05-10 09:45:37 +08:00
parent 34f22a4f24
commit c95b9658d1
5 changed files with 85 additions and 24 deletions

View File

@ -134,8 +134,22 @@ export async function handleStartPairing(accountId: string): Promise<void> {
count: synced, count: synced,
}); });
off(); off();
} else if (event.type === "close" && event.restartRequired) {
// After the user scans, WhatsApp tells Baileys to "restart"
// the connection. The socket closes with status 515 and the
// session-manager will reopen it with the new credentials —
// the next `open` event is what completes the pairing.
// This is NOT a failure: keep the listener attached so we see
// that subsequent `open` event, and don't surface a timeout
// to the UI. The DB row stays in `pending` until `open`.
logger.info(
{ accountId: id },
"pair: restart-required close (post-pair reconnect) — keeping listener alive",
);
// The session-manager handles the actual reconnect; nothing to
// do here other than NOT tear our listener / DB state down.
} else if (event.type === "close") { } else if (event.type === "close") {
// During the pairing window, ANY close means the QR window // During the pairing window, any other close means the QR window
// ended without a successful link — Baileys' default is to // ended without a successful link — Baileys' default is to
// close after exhausting QR refs (~2.5 min). Surface this to // close after exhausting QR refs (~2.5 min). Surface this to
// the UI so the user gets a "pairing timed out" screen, and // the UI so the user gets a "pairing timed out" screen, and

View File

@ -11,11 +11,18 @@ describe("decideOnPairClose", () => {
expect(r).toEqual({ next: "logged_out", clearQrPng: true }); expect(r).toEqual({ next: "logged_out", clearQrPng: true });
}); });
it("restart-required close → null (it's a SUCCESS — reconnect, don't touch DB)", () => {
// Regression we just fixed: after the user scans, Baileys closes
// the socket with status 515 ("restart required") so it can
// reopen with the new credentials. Treating that close as a
// failure produced a spurious "Pairing timed out" right at the
// moment the user actually paired successfully.
expect(
decideOnPairClose({ current: "pending", loggedOut: false, restartRequired: true }),
).toBe(null);
});
it("non-loggedOut close from `pending` parks the row as `unpaired`", () => { it("non-loggedOut close from `pending` parks the row as `unpaired`", () => {
// This is the regression we just fixed: a failed pair (Baileys
// exhausting QR refs, network blip, user closes the page) was
// leaving the row in `pending` forever, which the accounts list
// hid from the operator. It must now settle as `unpaired`.
const r = decideOnPairClose({ current: "pending", loggedOut: false }); const r = decideOnPairClose({ current: "pending", loggedOut: false });
expect(r).toEqual({ next: "unpaired", clearQrPng: true }); expect(r).toEqual({ next: "unpaired", clearQrPng: true });
}); });
@ -23,8 +30,9 @@ describe("decideOnPairClose", () => {
it("non-loggedOut close from any transient state parks as `unpaired`", () => { it("non-loggedOut close from any transient state parks as `unpaired`", () => {
for (const current of ["disconnected", "unpaired", "connected"] as const) { for (const current of ["disconnected", "unpaired", "connected"] as const) {
const r = decideOnPairClose({ current, loggedOut: false }); const r = decideOnPairClose({ current, loggedOut: false });
expect(r.next).toBe("unpaired"); expect(r).not.toBe(null);
expect(r.clearQrPng).toBe(true); expect(r!.next).toBe("unpaired");
expect(r!.clearQrPng).toBe(true);
} }
}); });
}); });
@ -50,13 +58,26 @@ describe("shouldAutoReconnect", () => {
it("never reconnects after a logged-out close", () => { it("never reconnects after a logged-out close", () => {
expect(shouldAutoReconnect({ loggedOut: true, hasEverConnected: true })).toBe(false); expect(shouldAutoReconnect({ loggedOut: true, hasEverConnected: true })).toBe(false);
expect(shouldAutoReconnect({ loggedOut: true, hasEverConnected: false })).toBe(false); expect(shouldAutoReconnect({ loggedOut: true, hasEverConnected: false })).toBe(false);
// Even if Baileys also flagged restartRequired (it shouldn't, but
// be defensive), loggedOut wins.
expect(
shouldAutoReconnect({ loggedOut: true, restartRequired: true, hasEverConnected: true }),
).toBe(false);
}); });
it("reconnects only for accounts that have been linked at least once", () => { it("ALWAYS reconnects on restart-required (post-pair-success), even for first-time accounts", () => {
// Regression guard: we used to auto-reconnect any non-loggedOut // The regression: brand-new pair attempts have hasEverConnected=false,
// close, which during a fresh pair attempt produced a 5-second QR // so the old logic refused to reconnect after status 515 — and the
// refresh loop because Baileys exhausts QR refs every few seconds // user got "Pairing timed out" the moment they actually paired.
// when the user hasn't scanned yet. expect(
shouldAutoReconnect({ loggedOut: false, restartRequired: true, hasEverConnected: false }),
).toBe(true);
expect(
shouldAutoReconnect({ loggedOut: false, restartRequired: true, hasEverConnected: true }),
).toBe(true);
});
it("reconnects only for accounts that have been linked at least once for non-restartRequired drops", () => {
expect(shouldAutoReconnect({ loggedOut: false, hasEverConnected: true })).toBe(true); expect(shouldAutoReconnect({ loggedOut: false, hasEverConnected: true })).toBe(true);
expect(shouldAutoReconnect({ loggedOut: false, hasEverConnected: false })).toBe(false); expect(shouldAutoReconnect({ loggedOut: false, hasEverConnected: false })).toBe(false);
}); });

View File

@ -25,26 +25,35 @@ export interface PairCloseInput {
current: AccountStatus; current: AccountStatus;
/** Did Baileys signal a logged-out close (vs an ephemeral close)? */ /** Did Baileys signal a logged-out close (vs an ephemeral close)? */
loggedOut: boolean; loggedOut: boolean;
/** Was it the post-pair "restart required" close (status 515)? */
restartRequired?: boolean;
} }
export interface StatusUpdate { export type StatusUpdate = {
next: AccountStatus; next: AccountStatus;
/** Wipe the cached QR PNG when the pair window closes. */ /** Wipe the cached QR PNG when the pair window closes. */
clearQrPng: boolean; clearQrPng: boolean;
} } | null;
/** /**
* Decide the status transition when the Baileys session closes during * Decide the status transition when the Baileys session closes during
* a pairing attempt (i.e. before the user has scanned the QR). * a pairing attempt.
* *
* - logged_out close terminal: `logged_out`. * - logged_out close terminal: `logged_out`.
* - restart-required close null (this is a SUCCESS signal that triggers
* a reconnect; the row stays in its current state until `open` fires).
* - ephemeral close (refs exhausted, network blip, etc.) park as * - ephemeral close (refs exhausted, network blip, etc.) park as
* `unpaired` so the row stays visible and the user can retry. * `unpaired` so the row stays visible and the user can retry.
*/ */
export function decideOnPairClose({ current, loggedOut }: PairCloseInput): StatusUpdate { export function decideOnPairClose({ current, loggedOut, restartRequired }: PairCloseInput): StatusUpdate {
if (loggedOut) { if (loggedOut) {
return { next: "logged_out", clearQrPng: true }; return { next: "logged_out", clearQrPng: true };
} }
if (restartRequired) {
// Post-pair-success reconnect — the next `open` event finishes the
// job. Don't touch DB state and don't tear the listener down.
return null;
}
// Whatever transient state we were in (most often `pending`), park // Whatever transient state we were in (most often `pending`), park
// the row as `unpaired` — anything else hides it from the operator. // the row as `unpaired` — anything else hides it from the operator.
return { next: "unpaired", clearQrPng: true }; return { next: "unpaired", clearQrPng: true };
@ -53,10 +62,14 @@ export function decideOnPairClose({ current, loggedOut }: PairCloseInput): Statu
/** Whether the session-manager should auto-reconnect after a non-loggedOut close. */ /** Whether the session-manager should auto-reconnect after a non-loggedOut close. */
export function shouldAutoReconnect(args: { export function shouldAutoReconnect(args: {
loggedOut: boolean; loggedOut: boolean;
restartRequired?: boolean;
/** True if the account row has `last_connected_at` set (has been linked before). */ /** True if the account row has `last_connected_at` set (has been linked before). */
hasEverConnected: boolean; hasEverConnected: boolean;
}): boolean { }): boolean {
if (args.loggedOut) return false; if (args.loggedOut) return false;
// Status 515 is the post-pair-success reconnect — always do it,
// regardless of whether the account has ever connected before.
if (args.restartRequired) return true;
return args.hasEverConnected; return args.hasEverConnected;
} }

View File

@ -143,13 +143,21 @@ class SessionManager {
if (event.loggedOut) { if (event.loggedOut) {
await this.stop(accountId); await this.stop(accountId);
} else if (event.restartRequired) {
// Status 515 — the post-pair-success reconnect. Always re-open
// immediately (no 5 s back-off, no `lastConnectedAt` gate). If
// we don't, the auth handshake never completes and the user
// sees a spurious "Pairing timed out".
const timer = setTimeout(() => {
this.reconnectTimers.delete(accountId);
void this.stop(accountId).then(() => this.start(accountId));
}, 250);
this.reconnectTimers.set(accountId, timer);
} else { } else {
// Only auto-reconnect for accounts that have been linked at least // Other ephemeral closes (refs exhausted, network blip): only
// once — `lastConnectedAt` is set on `open`. During an initial // auto-reconnect for accounts that have been linked at least
// pairing attempt the close event fires every time Baileys // once. During an initial pair attempt this would otherwise
// exhausts QR refs (~every 30s). Reconnecting would restart the // restart the pair dance and rotate the QR every few seconds.
// pair dance and rotate the QR every few seconds — pair-handler
// already manages the pairing window via its own 5-min timeout.
const account = await db.query.whatsappAccounts.findFirst({ const account = await db.query.whatsappAccounts.findFirst({
where: (a, { eq }) => eq(a.id, accountId), where: (a, { eq }) => eq(a.id, accountId),
columns: { lastConnectedAt: true }, columns: { lastConnectedAt: true },

View File

@ -16,7 +16,11 @@ import { syncGroupsForAccount } from "./group-sync.js";
export type SessionEvent = export type SessionEvent =
| { type: "qr"; payload: string } | { type: "qr"; payload: string }
| { type: "open"; phoneNumber: string | undefined } | { type: "open"; phoneNumber: string | undefined }
| { type: "close"; reason: number; loggedOut: boolean }; // `restartRequired` is set when Baileys closes the socket with status
// 515 — the normal post-pair handshake reconnect, NOT a failure. Both
// pair-handler and session-manager use it to skip the "pairing failed"
// path and re-open the socket so the account finishes linking.
| { type: "close"; reason: number; loggedOut: boolean; restartRequired: boolean };
export type SessionEventHandler = (event: SessionEvent) => void | Promise<void>; export type SessionEventHandler = (event: SessionEvent) => void | Promise<void>;
@ -84,7 +88,8 @@ export async function startSession(params: {
const reason = const reason =
(update.lastDisconnect?.error as { output?: { statusCode?: number } } | undefined)?.output?.statusCode ?? 0; (update.lastDisconnect?.error as { output?: { statusCode?: number } } | undefined)?.output?.statusCode ?? 0;
const loggedOut = reason === DisconnectReason.loggedOut; const loggedOut = reason === DisconnectReason.loggedOut;
void onEvent({ type: "close", reason, loggedOut }); const restartRequired = reason === DisconnectReason.restartRequired;
void onEvent({ type: "close", reason, loggedOut, restartRequired });
} }
}); });