From 0d0aa870b52ab17c77bdaf30bd15e59314843562 Mon Sep 17 00:00:00 2001 From: Flint <263629284+tps-flint@users.noreply.github.com> Date: Sat, 16 May 2026 21:29:55 -0700 Subject: [PATCH] fix(branch): outbox JSON-parse race kills branch daemon (ops-vr39) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The branch daemon's drainOutbox loop races with concurrent queueOutboxMessage writers: writeFileSync creates the destination filename atomically but the file content streams in non-atomically. readdir picks up the new filename mid-write, JSON.parse throws SyntaxError on empty/partial content, and the unhandled exception takes the whole daemon down. Real example: tps-reed branch office on 2026-05-16T17:17Z. Reed-VM mail watcher and a manual tps-mail-send fired ~2 minutes apart; one of them caught the daemon mid-readdir. Daemon process exit, branch office offline silently, two outbound messages stranded in outbox/new/ until manual `tps branch start` restart drained them. Fix: 1. queueOutboxMessage: atomic write via dot-prefixed tmp file in the same directory, then rename. drainOutbox already filters .endsWith(".json"), we extend that filter to also exclude .startsWith(".") so dot-tmp files are invisible to the reader. POSIX rename(2) within the same filesystem is atomic — the file appears in the new/ listing only once it's complete. 2. drainOutbox: defense-in-depth try/catch around JSON.parse. A bad file (manual edit, mid-write before rename, prior crash) gets logged and quarantined to sent/.malformed-, the loop continues, the daemon stays up. Pre-fix this was an uncaught throw — branch process exit. Tests: - New: dot-prefixed tmp files are ignored by drainOutbox. - New: malformed JSON is quarantined, not thrown; remaining good files drain. - New: queueOutboxMessage leaves no stray dot-tmp after success. - Existing 2 outbox tests still pass. Filed as ops-vr39 (P2). Branch daemon offline silently is the more dangerous failure mode; this closes that class. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.7 --- packages/cli/src/utils/outbox.ts | 37 ++++++++++++++++++++++++++------ packages/cli/test/outbox.test.ts | 37 +++++++++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 8 deletions(-) diff --git a/packages/cli/src/utils/outbox.ts b/packages/cli/src/utils/outbox.ts index 88a54ed..0fd9c32 100644 --- a/packages/cli/src/utils/outbox.ts +++ b/packages/cli/src/utils/outbox.ts @@ -1,4 +1,4 @@ -import { mkdirSync, readFileSync, readdirSync, renameSync, writeFileSync } from "node:fs"; +import { mkdirSync, readFileSync, readdirSync, renameSync, unlinkSync, writeFileSync } from "node:fs"; import { randomUUID } from "node:crypto"; import { join } from "node:path"; import { homedir } from "node:os"; @@ -21,7 +21,14 @@ export function queueOutboxMessage(to: string, body: string, from: string): void const id = randomUUID(); const timestamp = new Date().toISOString(); const filename = `${timestamp.replace(/[:.]/g, "-")}-${id}.json`; - writeFileSync(join(dir, filename), JSON.stringify({ id, to, from, body, timestamp }, null, 2), "utf-8"); + const content = JSON.stringify({ id, to, from, body, timestamp }, null, 2); + // Atomic write: stage to a dot-prefixed tmp file in the same directory, then + // rename into place. drainOutbox filters out dot-prefixed files so a reader + // running concurrently never sees a half-written file. rename(2) within the + // same filesystem is atomic on POSIX. + const tmp = join(dir, `.${filename}.tmp`); + writeFileSync(tmp, content, "utf-8"); + renameSync(tmp, join(dir, filename)); } export function drainOutbox(): OutboxMessage[] { @@ -30,11 +37,27 @@ export function drainOutbox(): OutboxMessage[] { mkdirSync(newDir, { recursive: true }); mkdirSync(sentDir, { recursive: true }); - const files = readdirSync(newDir).filter((f) => f.endsWith(".json")); - return files.map((f) => { + const files = readdirSync(newDir).filter((f) => f.endsWith(".json") && !f.startsWith(".")); + const out: OutboxMessage[] = []; + for (const f of files) { const src = join(newDir, f); - const msg = JSON.parse(readFileSync(src, "utf-8")) as OutboxMessage; + let msg: OutboxMessage; + try { + msg = JSON.parse(readFileSync(src, "utf-8")) as OutboxMessage; + } catch (err) { + // Defense-in-depth: even with atomic writes, a partial file could appear + // (manual edit, crash mid-write before rename). Don't take the whole + // daemon down — log and quarantine the bad file. + console.error(`drainOutbox: failed to parse ${f}: ${(err as Error).message}; quarantining`); + try { + renameSync(src, join(sentDir, `.malformed-${f}`)); + } catch { + try { unlinkSync(src); } catch {} + } + continue; + } renameSync(src, join(sentDir, f)); - return msg; - }); + out.push(msg); + } + return out; } diff --git a/packages/cli/test/outbox.test.ts b/packages/cli/test/outbox.test.ts index c98a660..6591b2b 100644 --- a/packages/cli/test/outbox.test.ts +++ b/packages/cli/test/outbox.test.ts @@ -1,5 +1,5 @@ import { describe, test, expect, beforeEach, afterEach } from "bun:test"; -import { mkdtempSync, rmSync, readdirSync } from "node:fs"; +import { mkdtempSync, rmSync, readdirSync, writeFileSync, existsSync } from "node:fs"; import { join } from "node:path"; import { tmpdir } from "node:os"; import { queueOutboxMessage, drainOutbox } from "../src/utils/outbox.js"; @@ -23,6 +23,12 @@ describe("outbox", () => { expect(files.length).toBe(1); }); + test("queueOutboxMessage leaves no dot-prefixed tmp files after a successful write", () => { + queueOutboxMessage("host", "hello", "austin"); + const all = readdirSync(join(root, ".tps", "outbox", "new")); + expect(all.every((f) => !f.startsWith("."))).toBe(true); + }); + test("drainOutbox returns messages and moves files to sent", () => { queueOutboxMessage("host", "hello", "austin"); const rows = drainOutbox(); @@ -34,4 +40,33 @@ describe("outbox", () => { expect(newFiles.length).toBe(0); expect(sentFiles.length).toBe(1); }); + + test("drainOutbox ignores dot-prefixed in-flight tmp files (atomic-write race guard)", () => { + // Simulate a writer that's mid-write: a dot-tmp file exists but the + // final rename hasn't happened yet. drainOutbox must not try to parse it. + const newDir = join(root, ".tps", "outbox", "new"); + queueOutboxMessage("host", "hello", "austin"); + writeFileSync(join(newDir, ".pending.json.tmp"), "{ partial", "utf-8"); + const rows = drainOutbox(); + expect(rows.length).toBe(1); + expect(rows[0]?.body).toBe("hello"); + // The in-flight tmp file is untouched + expect(existsSync(join(newDir, ".pending.json.tmp"))).toBe(true); + }); + + test("drainOutbox quarantines malformed JSON without throwing (defense in depth)", () => { + // Inject a fully-published-but-corrupt file (not dot-prefixed) and prove + // the daemon-equivalent loop doesn't crash. Pre-fix, this threw SyntaxError + // and killed the branch daemon on tps-reed 2026-05-16T17:17Z. + const newDir = join(root, ".tps", "outbox", "new"); + const sentDir = join(root, ".tps", "outbox", "sent"); + queueOutboxMessage("host", "good", "austin"); + writeFileSync(join(newDir, "2026-05-16-corrupt.json"), "", "utf-8"); + const rows = drainOutbox(); + expect(rows.length).toBe(1); + expect(rows[0]?.body).toBe("good"); + expect(readdirSync(newDir).length).toBe(0); + // Bad file lands in sent/ with a .malformed- prefix for forensics + expect(readdirSync(sentDir).some((f) => f.startsWith(".malformed-"))).toBe(true); + }); });