perplexityai · jliounis · May 19, 2026
diff --git a/src/index.test.ts b/src/index.test.ts
@@ -176,6 +176,125 @@ describe("Perplexity MCP Server", () => {
     });
   });
 
+  describe("HTTP 429 retry behavior", () => {
+    // Use zero-delay schedule so tests don't actually wait 2s/4s/8s.
+    const originalRetryDelays = process.env.PERPLEXITY_RETRY_DELAYS_MS;
+
+    beforeEach(() => {
+      process.env.PERPLEXITY_RETRY_DELAYS_MS = "0,0,0";
+    });
+
+    afterEach(() => {
+      if (originalRetryDelays === undefined) {
+        delete process.env.PERPLEXITY_RETRY_DELAYS_MS;
+      } else {
+        process.env.PERPLEXITY_RETRY_DELAYS_MS = originalRetryDelays;
+      }
+    });
+
+    it("should retry on 429 and succeed after rate limit clears", async () => {
+      let callCount = 0;
+      global.fetch = vi.fn().mockImplementation(async () => {
+        callCount++;
+        if (callCount < 3) {
+          return {
+            ok: false,
+            status: 429,
+            statusText: "Too Many Requests",
+            headers: new Headers(),
+            text: async () => "rate limited",
+          } as unknown as Response;
+        }
+        return {
+          ok: true,
+          status: 200,
+          statusText: "OK",
+          headers: new Headers(),
+          json: async () => ({ choices: [{ message: { content: "ok" } }] }),
+        } as unknown as Response;
+      });
+
+      const messages = [{ role: "user", content: "test" }];
+      const result = await performChatCompletion(messages);
+
+      expect(result).toBe("ok");
+      expect(callCount).toBe(3); // 1 initial + 2 retries
+    });
+
+    it("should give up after the configured number of 429 retries", async () => {
+      let callCount = 0;
+      global.fetch = vi.fn().mockImplementation(async () => {
+        callCount++;
+        return {
+          ok: false,
+          status: 429,
+          statusText: "Too Many Requests",
+          headers: new Headers(),
+          text: async () => "rate limited",
+        } as unknown as Response;
+      });
+
+      const messages = [{ role: "user", content: "test" }];
+      await expect(performChatCompletion(messages)).rejects.toThrow(
+        "Perplexity API error: 429 Too Many Requests"
+      );
+      // Default schedule has 3 retries, so 4 total attempts.
+      expect(callCount).toBe(4);
+    });
+
+    it("should not retry on non-429 errors", async () => {
+      let callCount = 0;
+      global.fetch = vi.fn().mockImplementation(async () => {
+        callCount++;
+        return {
+          ok: false,
+          status: 500,
+          statusText: "Internal Server Error",
+          headers: new Headers(),
+          text: async () => "oops",
+        } as unknown as Response;
+      });
+
+      const messages = [{ role: "user", content: "test" }];
+      await expect(performChatCompletion(messages)).rejects.toThrow(
+        "Perplexity API error: 500"
+      );
+      expect(callCount).toBe(1); // no retries for 5xx
+    });
+
+    it("should respect a Retry-After header on 429", async () => {
+      // Force a small but observable delay via Retry-After.
+      let callCount = 0;
+      const callTimes: number[] = [];
+      global.fetch = vi.fn().mockImplementation(async () => {
+        callTimes.push(Date.now());
+        callCount++;
+        if (callCount < 2) {
+          return {
+            ok: false,
+            status: 429,
+            statusText: "Too Many Requests",
+            // 0 means "retry immediately" — cheap, but proves the parsing path runs.
+            headers: new Headers({ "retry-after": "0" }),
+            text: async () => "rate limited",
+          } as unknown as Response;
+        }
+        return {
+          ok: true,
+          status: 200,
+          statusText: "OK",
+          headers: new Headers(),
+          json: async () => ({ choices: [{ message: { content: "ok" } }] }),
+        } as unknown as Response;
+      });
+
+      const messages = [{ role: "user", content: "test" }];
+      const result = await performChatCompletion(messages);
+      expect(result).toBe("ok");
+      expect(callCount).toBe(2);
+    });
+  });
+
   describe("performSearch", () => {
     it("should successfully perform search", async () => {
       const mockResponse = {

diff --git a/src/server.ts b/src/server.ts
@@ -61,15 +61,33 @@ export function stripThinkingTokens(content: string): string {
   return content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
 }
 
-async function makeApiRequest(
+/**
+ * Default retry schedule for HTTP 429 (rate limit) responses.
+ * Overridable for tests via the PERPLEXITY_RETRY_DELAYS_MS env var
+ * (comma-separated milliseconds, e.g. "0,0,0" to disable real waits).
+ */
+function getRetryDelaysMs(): number[] {
+  const raw = process.env.PERPLEXITY_RETRY_DELAYS_MS;
+  if (raw) {
+    const parsed = raw
+      .split(",")
+      .map(s => parseInt(s.trim(), 10))
+      .filter(n => Number.isFinite(n) && n >= 0);
+    if (parsed.length > 0) return parsed;
+  }
+  return [2000, 4000, 8000];
+}
+
+async function sleep(ms: number): Promise<void> {
+  if (ms <= 0) return;
+  await new Promise(resolve => setTimeout(resolve, ms));
+}
+
+async function singleApiAttempt(
   endpoint: string,
   body: Record<string, unknown>,
   serviceOrigin: string | undefined,
 ): Promise<Response> {
-  if (!PERPLEXITY_API_KEY) {
-    throw new Error("PERPLEXITY_API_KEY environment variable is required");
-  }
-
   // Read timeout fresh each time to respect env var changes
   const TIMEOUT_MS = parseInt(process.env.PERPLEXITY_TIMEOUT_MS || "300000", 10);
 
@@ -102,20 +120,59 @@ async function makeApiRequest(
     throw new Error(`Network error while calling Perplexity API: ${error}`);
   }
   clearTimeout(timeoutId);
+  return response;
+}
+
+async function makeApiRequest(
+  endpoint: string,
+  body: Record<string, unknown>,
+  serviceOrigin: string | undefined,
+): Promise<Response> {
+  if (!PERPLEXITY_API_KEY) {
+    throw new Error("PERPLEXITY_API_KEY environment variable is required");
+  }
+
+  const retryDelays = getRetryDelaysMs();
+  let response: Response | undefined;
+
+  // Initial attempt + up to retryDelays.length retries, exclusively for HTTP 429.
+  // Other status codes (4xx/5xx) fail fast — retrying them is not safe without
+  // operator-controlled idempotency keys, and Perplexity does not currently
+  // signal retry-safe 5xxs distinctly.
+  for (let attempt = 0; attempt <= retryDelays.length; attempt++) {
+    response = await singleApiAttempt(endpoint, body, serviceOrigin);
+
+    if (response.status !== 429) break;
+
+    const isLastAttempt = attempt === retryDelays.length;
+    if (isLastAttempt) break;
+
+    // Respect server-provided Retry-After (seconds) when present, otherwise
+    // fall back to the configured backoff schedule.
+    const retryAfterHeader = response.headers.get("retry-after");
+    let waitMs = retryDelays[attempt];
+    if (retryAfterHeader) {
+      const retryAfterSec = parseInt(retryAfterHeader, 10);
+      if (Number.isFinite(retryAfterSec) && retryAfterSec >= 0) {
+        waitMs = Math.max(waitMs, retryAfterSec * 1000);
+      }
+    }
+    await sleep(waitMs);
+  }
 
-  if (!response.ok) {
+  if (!response!.ok) {
     let errorText;
     try {
-      errorText = await response.text();
+      errorText = await response!.text();
     } catch (parseError) {
       errorText = "Unable to parse error response";
     }
     throw new Error(
-      `Perplexity API error: ${response.status} ${response.statusText}\n${errorText}`
+      `Perplexity API error: ${response!.status} ${response!.statusText}\n${errorText}`
     );
   }
 
-  return response;
+  return response!;
 }
 
 export async function consumeSSEStream(response: Response): Promise<ChatCompletionResponse> {