diff --git a/src/index.test.ts b/src/index.test.ts index 21897b8..9ac0ece 100644 --- a/src/index.test.ts +++ b/src/index.test.ts @@ -176,6 +176,125 @@ describe("Perplexity MCP Server", () => { }); }); + describe("HTTP 429 retry behavior", () => { + // Use zero-delay schedule so tests don't actually wait 2s/4s/8s. + const originalRetryDelays = process.env.PERPLEXITY_RETRY_DELAYS_MS; + + beforeEach(() => { + process.env.PERPLEXITY_RETRY_DELAYS_MS = "0,0,0"; + }); + + afterEach(() => { + if (originalRetryDelays === undefined) { + delete process.env.PERPLEXITY_RETRY_DELAYS_MS; + } else { + process.env.PERPLEXITY_RETRY_DELAYS_MS = originalRetryDelays; + } + }); + + it("should retry on 429 and succeed after rate limit clears", async () => { + let callCount = 0; + global.fetch = vi.fn().mockImplementation(async () => { + callCount++; + if (callCount < 3) { + return { + ok: false, + status: 429, + statusText: "Too Many Requests", + headers: new Headers(), + text: async () => "rate limited", + } as unknown as Response; + } + return { + ok: true, + status: 200, + statusText: "OK", + headers: new Headers(), + json: async () => ({ choices: [{ message: { content: "ok" } }] }), + } as unknown as Response; + }); + + const messages = [{ role: "user", content: "test" }]; + const result = await performChatCompletion(messages); + + expect(result).toBe("ok"); + expect(callCount).toBe(3); // 1 initial + 2 retries + }); + + it("should give up after the configured number of 429 retries", async () => { + let callCount = 0; + global.fetch = vi.fn().mockImplementation(async () => { + callCount++; + return { + ok: false, + status: 429, + statusText: "Too Many Requests", + headers: new Headers(), + text: async () => "rate limited", + } as unknown as Response; + }); + + const messages = [{ role: "user", content: "test" }]; + await expect(performChatCompletion(messages)).rejects.toThrow( + "Perplexity API error: 429 Too Many Requests" + ); + // Default schedule has 3 retries, so 4 total attempts. + expect(callCount).toBe(4); + }); + + it("should not retry on non-429 errors", async () => { + let callCount = 0; + global.fetch = vi.fn().mockImplementation(async () => { + callCount++; + return { + ok: false, + status: 500, + statusText: "Internal Server Error", + headers: new Headers(), + text: async () => "oops", + } as unknown as Response; + }); + + const messages = [{ role: "user", content: "test" }]; + await expect(performChatCompletion(messages)).rejects.toThrow( + "Perplexity API error: 500" + ); + expect(callCount).toBe(1); // no retries for 5xx + }); + + it("should respect a Retry-After header on 429", async () => { + // Force a small but observable delay via Retry-After. + let callCount = 0; + const callTimes: number[] = []; + global.fetch = vi.fn().mockImplementation(async () => { + callTimes.push(Date.now()); + callCount++; + if (callCount < 2) { + return { + ok: false, + status: 429, + statusText: "Too Many Requests", + // 0 means "retry immediately" — cheap, but proves the parsing path runs. + headers: new Headers({ "retry-after": "0" }), + text: async () => "rate limited", + } as unknown as Response; + } + return { + ok: true, + status: 200, + statusText: "OK", + headers: new Headers(), + json: async () => ({ choices: [{ message: { content: "ok" } }] }), + } as unknown as Response; + }); + + const messages = [{ role: "user", content: "test" }]; + const result = await performChatCompletion(messages); + expect(result).toBe("ok"); + expect(callCount).toBe(2); + }); + }); + describe("performSearch", () => { it("should successfully perform search", async () => { const mockResponse = { diff --git a/src/server.ts b/src/server.ts index d649ccb..05191d2 100644 --- a/src/server.ts +++ b/src/server.ts @@ -61,15 +61,33 @@ export function stripThinkingTokens(content: string): string { return content.replace(/[\s\S]*?<\/think>/g, '').trim(); } -async function makeApiRequest( +/** + * Default retry schedule for HTTP 429 (rate limit) responses. + * Overridable for tests via the PERPLEXITY_RETRY_DELAYS_MS env var + * (comma-separated milliseconds, e.g. "0,0,0" to disable real waits). + */ +function getRetryDelaysMs(): number[] { + const raw = process.env.PERPLEXITY_RETRY_DELAYS_MS; + if (raw) { + const parsed = raw + .split(",") + .map(s => parseInt(s.trim(), 10)) + .filter(n => Number.isFinite(n) && n >= 0); + if (parsed.length > 0) return parsed; + } + return [2000, 4000, 8000]; +} + +async function sleep(ms: number): Promise { + if (ms <= 0) return; + await new Promise(resolve => setTimeout(resolve, ms)); +} + +async function singleApiAttempt( endpoint: string, body: Record, serviceOrigin: string | undefined, ): Promise { - if (!PERPLEXITY_API_KEY) { - throw new Error("PERPLEXITY_API_KEY environment variable is required"); - } - // Read timeout fresh each time to respect env var changes const TIMEOUT_MS = parseInt(process.env.PERPLEXITY_TIMEOUT_MS || "300000", 10); @@ -102,20 +120,59 @@ async function makeApiRequest( throw new Error(`Network error while calling Perplexity API: ${error}`); } clearTimeout(timeoutId); + return response; +} + +async function makeApiRequest( + endpoint: string, + body: Record, + serviceOrigin: string | undefined, +): Promise { + if (!PERPLEXITY_API_KEY) { + throw new Error("PERPLEXITY_API_KEY environment variable is required"); + } + + const retryDelays = getRetryDelaysMs(); + let response: Response | undefined; + + // Initial attempt + up to retryDelays.length retries, exclusively for HTTP 429. + // Other status codes (4xx/5xx) fail fast — retrying them is not safe without + // operator-controlled idempotency keys, and Perplexity does not currently + // signal retry-safe 5xxs distinctly. + for (let attempt = 0; attempt <= retryDelays.length; attempt++) { + response = await singleApiAttempt(endpoint, body, serviceOrigin); + + if (response.status !== 429) break; + + const isLastAttempt = attempt === retryDelays.length; + if (isLastAttempt) break; + + // Respect server-provided Retry-After (seconds) when present, otherwise + // fall back to the configured backoff schedule. + const retryAfterHeader = response.headers.get("retry-after"); + let waitMs = retryDelays[attempt]; + if (retryAfterHeader) { + const retryAfterSec = parseInt(retryAfterHeader, 10); + if (Number.isFinite(retryAfterSec) && retryAfterSec >= 0) { + waitMs = Math.max(waitMs, retryAfterSec * 1000); + } + } + await sleep(waitMs); + } - if (!response.ok) { + if (!response!.ok) { let errorText; try { - errorText = await response.text(); + errorText = await response!.text(); } catch (parseError) { errorText = "Unable to parse error response"; } throw new Error( - `Perplexity API error: ${response.status} ${response.statusText}\n${errorText}` + `Perplexity API error: ${response!.status} ${response!.statusText}\n${errorText}` ); } - return response; + return response!; } export async function consumeSSEStream(response: Response): Promise {