From 4b9514b1f3df254ba3143deeff411cc419530c0e Mon Sep 17 00:00:00 2001
From: James Liounis <james.liounis@perplexity.ai>
Date: Tue, 19 May 2026 13:56:38 +0000
Subject: [PATCH] feat: exponential backoff on HTTP 429 (re-impl of #90)

Re-implementation of the rate-limit retry portion of #90.
Credit: @akshayabogoju-coder for the original proposal.

Why
---
Today, makeApiRequest fails immediately on HTTP 429. That makes the
server flaky under bursts and propagates a hard failure to the MCP
client for what is almost always a transient condition.

Change
------
makeApiRequest now wraps the underlying HTTP call (extracted as
singleApiAttempt) in a retry loop that:

  - Retries only on HTTP 429 (rate limit). Other 4xx/5xx fail fast,
    because we can't assume idempotency without operator-controlled
    idempotency keys.
  - Uses the schedule [2s, 4s, 8s] by default (3 retries, 4 attempts
    total).
  - Respects the upstream Retry-After header when present. The actual
    wait is max(configured-delay, retry-after-seconds * 1000) so the
    server never retries faster than the API asks.
  - Schedule is overridable via PERPLEXITY_RETRY_DELAYS_MS (comma-
    separated milliseconds) for tests and ops tuning.

Scope notes
-----------
This PR is intentionally narrower than the original #90:

  - It does NOT touch HTTP transport / 0.0.0.0 binding / origin
    controls (those belong in a separate security review).
  - It does NOT add a README "Professional Contributions" section
    (out of scope).
  - It does NOT remove `args: any` casts in tool handlers \u2014 the MCP
    SDK already enforces the Zod inputSchema before the handler runs,
    so the cast is cosmetic, not a safety hole. Worth its own cleanup
    PR if/when the SDK's handler generics stabilize.
  - The CWE-200 sanitization that #90 also touched is being shipped
    separately in #107 to keep review scopes clean.

Tests
-----
- npm test: 82 passed / 82 (was 78 on main).
- 4 new tests in a "HTTP 429 retry behavior" suite:
    * success after N transient 429s
    * gives up after the configured retry count
    * does NOT retry non-429 errors (5xx fails fast)
    * Retry-After header is honored
- Tests run with PERPLEXITY_RETRY_DELAYS_MS=0,0,0 to avoid real waits.
- npm run build: clean.

Co-authored-by: @akshayabogoju-coder
---
 src/index.test.ts | 119 ++++++++++++++++++++++++++++++++++++++++++++++
 src/server.ts     |  75 +++++++++++++++++++++++++----
 2 files changed, 185 insertions(+), 9 deletions(-)

diff --git a/src/index.test.ts b/src/index.test.ts
index 21897b8..9ac0ece 100644
--- a/src/index.test.ts
+++ b/src/index.test.ts
@@ -176,6 +176,125 @@ describe("Perplexity MCP Server", () => {
     });
   });
 
+  describe("HTTP 429 retry behavior", () => {
+    // Use zero-delay schedule so tests don't actually wait 2s/4s/8s.
+    const originalRetryDelays = process.env.PERPLEXITY_RETRY_DELAYS_MS;
+
+    beforeEach(() => {
+      process.env.PERPLEXITY_RETRY_DELAYS_MS = "0,0,0";
+    });
+
+    afterEach(() => {
+      if (originalRetryDelays === undefined) {
+        delete process.env.PERPLEXITY_RETRY_DELAYS_MS;
+      } else {
+        process.env.PERPLEXITY_RETRY_DELAYS_MS = originalRetryDelays;
+      }
+    });
+
+    it("should retry on 429 and succeed after rate limit clears", async () => {
+      let callCount = 0;
+      global.fetch = vi.fn().mockImplementation(async () => {
+        callCount++;
+        if (callCount < 3) {
+          return {
+            ok: false,
+            status: 429,
+            statusText: "Too Many Requests",
+            headers: new Headers(),
+            text: async () => "rate limited",
+          } as unknown as Response;
+        }
+        return {
+          ok: true,
+          status: 200,
+          statusText: "OK",
+          headers: new Headers(),
+          json: async () => ({ choices: [{ message: { content: "ok" } }] }),
+        } as unknown as Response;
+      });
+
+      const messages = [{ role: "user", content: "test" }];
+      const result = await performChatCompletion(messages);
+
+      expect(result).toBe("ok");
+      expect(callCount).toBe(3); // 1 initial + 2 retries
+    });
+
+    it("should give up after the configured number of 429 retries", async () => {
+      let callCount = 0;
+      global.fetch = vi.fn().mockImplementation(async () => {
+        callCount++;
+        return {
+          ok: false,
+          status: 429,
+          statusText: "Too Many Requests",
+          headers: new Headers(),
+          text: async () => "rate limited",
+        } as unknown as Response;
+      });
+
+      const messages = [{ role: "user", content: "test" }];
+      await expect(performChatCompletion(messages)).rejects.toThrow(
+        "Perplexity API error: 429 Too Many Requests"
+      );
+      // Default schedule has 3 retries, so 4 total attempts.
+      expect(callCount).toBe(4);
+    });
+
+    it("should not retry on non-429 errors", async () => {
+      let callCount = 0;
+      global.fetch = vi.fn().mockImplementation(async () => {
+        callCount++;
+        return {
+          ok: false,
+          status: 500,
+          statusText: "Internal Server Error",
+          headers: new Headers(),
+          text: async () => "oops",
+        } as unknown as Response;
+      });
+
+      const messages = [{ role: "user", content: "test" }];
+      await expect(performChatCompletion(messages)).rejects.toThrow(
+        "Perplexity API error: 500"
+      );
+      expect(callCount).toBe(1); // no retries for 5xx
+    });
+
+    it("should respect a Retry-After header on 429", async () => {
+      // Force a small but observable delay via Retry-After.
+      let callCount = 0;
+      const callTimes: number[] = [];
+      global.fetch = vi.fn().mockImplementation(async () => {
+        callTimes.push(Date.now());
+        callCount++;
+        if (callCount < 2) {
+          return {
+            ok: false,
+            status: 429,
+            statusText: "Too Many Requests",
+            // 0 means "retry immediately" — cheap, but proves the parsing path runs.
+            headers: new Headers({ "retry-after": "0" }),
+            text: async () => "rate limited",
+          } as unknown as Response;
+        }
+        return {
+          ok: true,
+          status: 200,
+          statusText: "OK",
+          headers: new Headers(),
+          json: async () => ({ choices: [{ message: { content: "ok" } }] }),
+        } as unknown as Response;
+      });
+
+      const messages = [{ role: "user", content: "test" }];
+      const result = await performChatCompletion(messages);
+      expect(result).toBe("ok");
+      expect(callCount).toBe(2);
+    });
+  });
+
   describe("performSearch", () => {
     it("should successfully perform search", async () => {
       const mockResponse = {
diff --git a/src/server.ts b/src/server.ts
index d649ccb..05191d2 100644
--- a/src/server.ts
+++ b/src/server.ts
@@ -61,15 +61,33 @@ export function stripThinkingTokens(content: string): string {
   return content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
 }
 
-async function makeApiRequest(
+/**
+ * Default retry schedule for HTTP 429 (rate limit) responses.
+ * Overridable for tests via the PERPLEXITY_RETRY_DELAYS_MS env var
+ * (comma-separated milliseconds, e.g. "0,0,0" to disable real waits).
+ */
+function getRetryDelaysMs(): number[] {
+  const raw = process.env.PERPLEXITY_RETRY_DELAYS_MS;
+  if (raw) {
+    const parsed = raw
+      .split(",")
+      .map(s => parseInt(s.trim(), 10))
+      .filter(n => Number.isFinite(n) && n >= 0);
+    if (parsed.length > 0) return parsed;
+  }
+  return [2000, 4000, 8000];
+}
+
+async function sleep(ms: number): Promise<void> {
+  if (ms <= 0) return;
+  await new Promise(resolve => setTimeout(resolve, ms));
+}
+
+async function singleApiAttempt(
   endpoint: string,
   body: Record<string, unknown>,
   serviceOrigin: string | undefined,
 ): Promise<Response> {
-  if (!PERPLEXITY_API_KEY) {
-    throw new Error("PERPLEXITY_API_KEY environment variable is required");
-  }
-
   // Read timeout fresh each time to respect env var changes
   const TIMEOUT_MS = parseInt(process.env.PERPLEXITY_TIMEOUT_MS || "300000", 10);
 
@@ -102,20 +120,59 @@ async function makeApiRequest(
     throw new Error(`Network error while calling Perplexity API: ${error}`);
   }
   clearTimeout(timeoutId);
+  return response;
+}
+
+async function makeApiRequest(
+  endpoint: string,
+  body: Record<string, unknown>,
+  serviceOrigin: string | undefined,
+): Promise<Response> {
+  if (!PERPLEXITY_API_KEY) {
+    throw new Error("PERPLEXITY_API_KEY environment variable is required");
+  }
+
+  const retryDelays = getRetryDelaysMs();
+  let response: Response | undefined;
+
+  // Initial attempt + up to retryDelays.length retries, exclusively for HTTP 429.
+  // Other status codes (4xx/5xx) fail fast — retrying them is not safe without
+  // operator-controlled idempotency keys, and Perplexity does not currently
+  // signal retry-safe 5xxs distinctly.
+  for (let attempt = 0; attempt <= retryDelays.length; attempt++) {
+    response = await singleApiAttempt(endpoint, body, serviceOrigin);
+
+    if (response.status !== 429) break;
+
+    const isLastAttempt = attempt === retryDelays.length;
+    if (isLastAttempt) break;
+
+    // Respect server-provided Retry-After (seconds) when present, otherwise
+    // fall back to the configured backoff schedule.
+    const retryAfterHeader = response.headers.get("retry-after");
+    let waitMs = retryDelays[attempt];
+    if (retryAfterHeader) {
+      const retryAfterSec = parseInt(retryAfterHeader, 10);
+      if (Number.isFinite(retryAfterSec) && retryAfterSec >= 0) {
+        waitMs = Math.max(waitMs, retryAfterSec * 1000);
+      }
+    }
+    await sleep(waitMs);
+  }
 
-  if (!response.ok) {
+  if (!response!.ok) {
     let errorText;
     try {
-      errorText = await response.text();
+      errorText = await response!.text();
     } catch (parseError) {
       errorText = "Unable to parse error response";
     }
     throw new Error(
-      `Perplexity API error: ${response.status} ${response.statusText}\n${errorText}`
+      `Perplexity API error: ${response!.status} ${response!.statusText}\n${errorText}`
     );
   }
 
-  return response;
+  return response!;
 }
 
 export async function consumeSSEStream(response: Response): Promise<ChatCompletionResponse> {