Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
119 changes: 119 additions & 0 deletions src/index.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,125 @@ describe("Perplexity MCP Server", () => {
});
});

describe("HTTP 429 retry behavior", () => {
// Use zero-delay schedule so tests don't actually wait 2s/4s/8s.
const originalRetryDelays = process.env.PERPLEXITY_RETRY_DELAYS_MS;

beforeEach(() => {
process.env.PERPLEXITY_RETRY_DELAYS_MS = "0,0,0";
});

afterEach(() => {
if (originalRetryDelays === undefined) {
delete process.env.PERPLEXITY_RETRY_DELAYS_MS;
} else {
process.env.PERPLEXITY_RETRY_DELAYS_MS = originalRetryDelays;
}
});

it("should retry on 429 and succeed after rate limit clears", async () => {
let callCount = 0;
global.fetch = vi.fn().mockImplementation(async () => {
callCount++;
if (callCount < 3) {
return {
ok: false,
status: 429,
statusText: "Too Many Requests",
headers: new Headers(),
text: async () => "rate limited",
} as unknown as Response;
}
return {
ok: true,
status: 200,
statusText: "OK",
headers: new Headers(),
json: async () => ({ choices: [{ message: { content: "ok" } }] }),
} as unknown as Response;
});

const messages = [{ role: "user", content: "test" }];
const result = await performChatCompletion(messages);

expect(result).toBe("ok");
expect(callCount).toBe(3); // 1 initial + 2 retries
});

it("should give up after the configured number of 429 retries", async () => {
let callCount = 0;
global.fetch = vi.fn().mockImplementation(async () => {
callCount++;
return {
ok: false,
status: 429,
statusText: "Too Many Requests",
headers: new Headers(),
text: async () => "rate limited",
} as unknown as Response;
});

const messages = [{ role: "user", content: "test" }];
await expect(performChatCompletion(messages)).rejects.toThrow(
"Perplexity API error: 429 Too Many Requests"
);
// Default schedule has 3 retries, so 4 total attempts.
expect(callCount).toBe(4);
});

it("should not retry on non-429 errors", async () => {
let callCount = 0;
global.fetch = vi.fn().mockImplementation(async () => {
callCount++;
return {
ok: false,
status: 500,
statusText: "Internal Server Error",
headers: new Headers(),
text: async () => "oops",
} as unknown as Response;
});

const messages = [{ role: "user", content: "test" }];
await expect(performChatCompletion(messages)).rejects.toThrow(
"Perplexity API error: 500"
);
expect(callCount).toBe(1); // no retries for 5xx
});

it("should respect a Retry-After header on 429", async () => {
// Force a small but observable delay via Retry-After.
let callCount = 0;
const callTimes: number[] = [];
global.fetch = vi.fn().mockImplementation(async () => {
callTimes.push(Date.now());
callCount++;
if (callCount < 2) {
return {
ok: false,
status: 429,
statusText: "Too Many Requests",
// 0 means "retry immediately" — cheap, but proves the parsing path runs.
headers: new Headers({ "retry-after": "0" }),
text: async () => "rate limited",
} as unknown as Response;
}
return {
ok: true,
status: 200,
statusText: "OK",
headers: new Headers(),
json: async () => ({ choices: [{ message: { content: "ok" } }] }),
} as unknown as Response;
});

const messages = [{ role: "user", content: "test" }];
const result = await performChatCompletion(messages);
expect(result).toBe("ok");
expect(callCount).toBe(2);
});
});

describe("performSearch", () => {
it("should successfully perform search", async () => {
const mockResponse = {
Expand Down
75 changes: 66 additions & 9 deletions src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -61,15 +61,33 @@ export function stripThinkingTokens(content: string): string {
return content.replace(/<think>[\s\S]*?<\/think>/g, '').trim();
}

async function makeApiRequest(
/**
* Default retry schedule for HTTP 429 (rate limit) responses.
* Overridable for tests via the PERPLEXITY_RETRY_DELAYS_MS env var
* (comma-separated milliseconds, e.g. "0,0,0" to disable real waits).
*/
function getRetryDelaysMs(): number[] {
const raw = process.env.PERPLEXITY_RETRY_DELAYS_MS;
if (raw) {
const parsed = raw
.split(",")
.map(s => parseInt(s.trim(), 10))
.filter(n => Number.isFinite(n) && n >= 0);
if (parsed.length > 0) return parsed;
}
return [2000, 4000, 8000];
}

async function sleep(ms: number): Promise<void> {
if (ms <= 0) return;
await new Promise(resolve => setTimeout(resolve, ms));
}

async function singleApiAttempt(
endpoint: string,
body: Record<string, unknown>,
serviceOrigin: string | undefined,
): Promise<Response> {
if (!PERPLEXITY_API_KEY) {
throw new Error("PERPLEXITY_API_KEY environment variable is required");
}

// Read timeout fresh each time to respect env var changes
const TIMEOUT_MS = parseInt(process.env.PERPLEXITY_TIMEOUT_MS || "300000", 10);

Expand Down Expand Up @@ -102,20 +120,59 @@ async function makeApiRequest(
throw new Error(`Network error while calling Perplexity API: ${error}`);
}
clearTimeout(timeoutId);
return response;
}

async function makeApiRequest(
endpoint: string,
body: Record<string, unknown>,
serviceOrigin: string | undefined,
): Promise<Response> {
if (!PERPLEXITY_API_KEY) {
throw new Error("PERPLEXITY_API_KEY environment variable is required");
}

const retryDelays = getRetryDelaysMs();
let response: Response | undefined;

// Initial attempt + up to retryDelays.length retries, exclusively for HTTP 429.
// Other status codes (4xx/5xx) fail fast — retrying them is not safe without
// operator-controlled idempotency keys, and Perplexity does not currently
// signal retry-safe 5xxs distinctly.
for (let attempt = 0; attempt <= retryDelays.length; attempt++) {
response = await singleApiAttempt(endpoint, body, serviceOrigin);

if (response.status !== 429) break;

const isLastAttempt = attempt === retryDelays.length;
if (isLastAttempt) break;

// Respect server-provided Retry-After (seconds) when present, otherwise
// fall back to the configured backoff schedule.
const retryAfterHeader = response.headers.get("retry-after");
let waitMs = retryDelays[attempt];
if (retryAfterHeader) {
const retryAfterSec = parseInt(retryAfterHeader, 10);
if (Number.isFinite(retryAfterSec) && retryAfterSec >= 0) {
waitMs = Math.max(waitMs, retryAfterSec * 1000);
}
}
await sleep(waitMs);
}

if (!response.ok) {
if (!response!.ok) {
let errorText;
try {
errorText = await response.text();
errorText = await response!.text();
} catch (parseError) {
errorText = "Unable to parse error response";
}
throw new Error(
`Perplexity API error: ${response.status} ${response.statusText}\n${errorText}`
`Perplexity API error: ${response!.status} ${response!.statusText}\n${errorText}`
);
}

return response;
return response!;
}

export async function consumeSSEStream(response: Response): Promise<ChatCompletionResponse> {
Expand Down