From 4b9514b1f3df254ba3143deeff411cc419530c0e Mon Sep 17 00:00:00 2001 From: James Liounis Date: Tue, 19 May 2026 13:56:38 +0000 Subject: [PATCH] feat: exponential backoff on HTTP 429 (re-impl of #90) Re-implementation of the rate-limit retry portion of #90. Credit: @akshayabogoju-coder for the original proposal. Why --- Today, makeApiRequest fails immediately on HTTP 429. That makes the server flaky under bursts and propagates a hard failure to the MCP client for what is almost always a transient condition. Change ------ makeApiRequest now wraps the underlying HTTP call (extracted as singleApiAttempt) in a retry loop that: - Retries only on HTTP 429 (rate limit). Other 4xx/5xx fail fast, because we can't assume idempotency without operator-controlled idempotency keys. - Uses the schedule [2s, 4s, 8s] by default (3 retries, 4 attempts total). - Respects the upstream Retry-After header when present. The actual wait is max(configured-delay, retry-after-seconds * 1000) so the server never retries faster than the API asks. - Schedule is overridable via PERPLEXITY_RETRY_DELAYS_MS (comma- separated milliseconds) for tests and ops tuning. Scope notes ----------- This PR is intentionally narrower than the original #90: - It does NOT touch HTTP transport / 0.0.0.0 binding / origin controls (those belong in a separate security review). - It does NOT add a README "Professional Contributions" section (out of scope). - It does NOT remove `args: any` casts in tool handlers \u2014 the MCP SDK already enforces the Zod inputSchema before the handler runs, so the cast is cosmetic, not a safety hole. Worth its own cleanup PR if/when the SDK's handler generics stabilize. - The CWE-200 sanitization that #90 also touched is being shipped separately in #107 to keep review scopes clean. Tests ----- - npm test: 82 passed / 82 (was 78 on main). - 4 new tests in a "HTTP 429 retry behavior" suite: * success after N transient 429s * gives up after the configured retry count * does NOT retry non-429 errors (5xx fails fast) * Retry-After header is honored - Tests run with PERPLEXITY_RETRY_DELAYS_MS=0,0,0 to avoid real waits. - npm run build: clean. Co-authored-by: @akshayabogoju-coder --- src/index.test.ts | 119 ++++++++++++++++++++++++++++++++++++++++++++++ src/server.ts | 75 +++++++++++++++++++++++++---- 2 files changed, 185 insertions(+), 9 deletions(-) diff --git a/src/index.test.ts b/src/index.test.ts index 21897b8..9ac0ece 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -176,6 +176,125 @@ describe("Perplexity MCP Server", () => { }); }); + describe("HTTP 429 retry behavior", () => { + // Use zero-delay schedule so tests don't actually wait 2s/4s/8s. + const originalRetryDelays = process.env.PERPLEXITY_RETRY_DELAYS_MS; + + beforeEach(() => { + process.env.PERPLEXITY_RETRY_DELAYS_MS = "0,0,0"; + }); + + afterEach(() => { + if (originalRetryDelays === undefined) { + delete process.env.PERPLEXITY_RETRY_DELAYS_MS; + } else { + process.env.PERPLEXITY_RETRY_DELAYS_MS = originalRetryDelays; + } + }); + + it("should retry on 429 and succeed after rate limit clears", async () => { + let callCount = 0; + global.fetch = vi.fn().mockImplementation(async () => { + callCount++; + if (callCount < 3) { + return { + ok: false, + status: 429, + statusText: "Too Many Requests", + headers: new Headers(), + text: async () => "rate limited", + } as unknown as Response; + } + return { + ok: true, + status: 200, + statusText: "OK", + headers: new Headers(), + json: async () => ({ choices: [{ message: { content: "ok" } }] }), + } as unknown as Response; + }); + + const messages = [{ role: "user", content: "test" }]; + const result = await performChatCompletion(messages); + + expect(result).toBe("ok"); + expect(callCount).toBe(3); // 1 initial + 2 retries + }); + + it("should give up after the configured number of 429 retries", async () => { + let callCount = 0; + global.fetch = vi.fn().mockImplementation(async () => { + callCount++; + return { + ok: false, + status: 429, + statusText: "Too Many Requests", + headers: new Headers(), + text: async () => "rate limited", + } as unknown as Response; + }); + + const messages = [{ role: "user", content: "test" }]; + await expect(performChatCompletion(messages)).rejects.toThrow( + "Perplexity API error: 429 Too Many Requests" + ); + // Default schedule has 3 retries, so 4 total attempts. + expect(callCount).toBe(4); + }); + + it("should not retry on non-429 errors", async () => { + let callCount = 0; + global.fetch = vi.fn().mockImplementation(async () => { + callCount++; + return { + ok: false, + status: 500, + statusText: "Internal Server Error", + headers: new Headers(), + text: async () => "oops", + } as unknown as Response; + }); + + const messages = [{ role: "user", content: "test" }]; + await expect(performChatCompletion(messages)).rejects.toThrow( + "Perplexity API error: 500" + ); + expect(callCount).toBe(1); // no retries for 5xx + }); + + it("should respect a Retry-After header on 429", async () => { + // Force a small but observable delay via Retry-After. + let callCount = 0; + const callTimes: number[] = []; + global.fetch = vi.fn().mockImplementation(async () => { + callTimes.push(Date.now()); + callCount++; + if (callCount < 2) { + return { + ok: false, + status: 429, + statusText: "Too Many Requests", + // 0 means "retry immediately" — cheap, but proves the parsing path runs. + headers: new Headers({ "retry-after": "0" }), + text: async () => "rate limited", + } as unknown as Response; + } + return { + ok: true, + status: 200, + statusText: "OK", + headers: new Headers(), + json: async () => ({ choices: [{ message: { content: "ok" } }] }), + } as unknown as Response; + }); + + const messages = [{ role: "user", content: "test" }]; + const result = await performChatCompletion(messages); + expect(result).toBe("ok"); + expect(callCount).toBe(2); + }); + }); + describe("performSearch", () => { it("should successfully perform search", async () => { const mockResponse = { diff --git a/src/server.ts b/src/server.ts index d649ccb..05191d2 100644 --- a/src/server.ts +++ b/src/server.ts @@ -61,15 +61,33 @@ export function stripThinkingTokens(content: string): string { return content.replace(/[\s\S]*?<\/think>/g, '').trim(); } -async function makeApiRequest( +/** + * Default retry schedule for HTTP 429 (rate limit) responses. + * Overridable for tests via the PERPLEXITY_RETRY_DELAYS_MS env var + * (comma-separated milliseconds, e.g. "0,0,0" to disable real waits). + */ +function getRetryDelaysMs(): number[] { + const raw = process.env.PERPLEXITY_RETRY_DELAYS_MS; + if (raw) { + const parsed = raw + .split(",") + .map(s => parseInt(s.trim(), 10)) + .filter(n => Number.isFinite(n) && n >= 0); + if (parsed.length > 0) return parsed; + } + return [2000, 4000, 8000]; +} + +async function sleep(ms: number): Promise { + if (ms <= 0) return; + await new Promise(resolve => setTimeout(resolve, ms)); +} + +async function singleApiAttempt( endpoint: string, body: Record, serviceOrigin: string | undefined, ): Promise { - if (!PERPLEXITY_API_KEY) { - throw new Error("PERPLEXITY_API_KEY environment variable is required"); - } - // Read timeout fresh each time to respect env var changes const TIMEOUT_MS = parseInt(process.env.PERPLEXITY_TIMEOUT_MS || "300000", 10); @@ -102,20 +120,59 @@ async function makeApiRequest( throw new Error(`Network error while calling Perplexity API: ${error}`); } clearTimeout(timeoutId); + return response; +} + +async function makeApiRequest( + endpoint: string, + body: Record, + serviceOrigin: string | undefined, +): Promise { + if (!PERPLEXITY_API_KEY) { + throw new Error("PERPLEXITY_API_KEY environment variable is required"); + } + + const retryDelays = getRetryDelaysMs(); + let response: Response | undefined; + + // Initial attempt + up to retryDelays.length retries, exclusively for HTTP 429. + // Other status codes (4xx/5xx) fail fast — retrying them is not safe without + // operator-controlled idempotency keys, and Perplexity does not currently + // signal retry-safe 5xxs distinctly. + for (let attempt = 0; attempt <= retryDelays.length; attempt++) { + response = await singleApiAttempt(endpoint, body, serviceOrigin); + + if (response.status !== 429) break; + + const isLastAttempt = attempt === retryDelays.length; + if (isLastAttempt) break; + + // Respect server-provided Retry-After (seconds) when present, otherwise + // fall back to the configured backoff schedule. + const retryAfterHeader = response.headers.get("retry-after"); + let waitMs = retryDelays[attempt]; + if (retryAfterHeader) { + const retryAfterSec = parseInt(retryAfterHeader, 10); + if (Number.isFinite(retryAfterSec) && retryAfterSec >= 0) { + waitMs = Math.max(waitMs, retryAfterSec * 1000); + } + } + await sleep(waitMs); + } - if (!response.ok) { + if (!response!.ok) { let errorText; try { - errorText = await response.text(); + errorText = await response!.text(); } catch (parseError) { errorText = "Unable to parse error response"; } throw new Error( - `Perplexity API error: ${response.status} ${response.statusText}\n${errorText}` + `Perplexity API error: ${response!.status} ${response!.statusText}\n${errorText}` ); } - return response; + return response!; } export async function consumeSSEStream(response: Response): Promise {