Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .changeset/mimo-provider-compat.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
---
"@moonshot-ai/kimi-code": patch
"@moonshot-ai/agent-core": patch
"@moonshot-ai/kosong": patch
---

Fix OpenAI-compatible provider requests for catalog-imported models with large context windows.
1 change: 1 addition & 0 deletions packages/agent-core/src/agent/compaction/full.ts
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,7 @@ export class FullCompaction {
provider: this.agent.config.provider,
budget: resolveCompletionBudget({
reservedContextSize: this.agent.kimiConfig?.loopControl?.reservedContextSize,
maxOutputSize: this.agent.config.modelMaxOutputSize,
}),
capability: this.agent.config.modelCapabilities,
});
Expand Down
4 changes: 4 additions & 0 deletions packages/agent-core/src/agent/config/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ export class ConfigState {
return this.tryResolvedProviderConfig()?.modelCapabilities ?? UNKNOWN_CAPABILITY;
}

get modelMaxOutputSize(): number | undefined {
return this.tryResolvedProviderConfig()?.maxOutputSize;
}

private get resolvedProviderConfig(): ResolvedRuntimeProvider | undefined {
if (this._modelAlias === undefined) return undefined;
return this.agent.modelProvider?.resolveProviderConfig(this._modelAlias);
Expand Down
1 change: 1 addition & 0 deletions packages/agent-core/src/agent/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ export class Agent {
const loopControl = this.kimiConfig?.loopControl;
const completionBudgetConfig = resolveCompletionBudget({
reservedContextSize: loopControl?.reservedContextSize,
maxOutputSize: this.config.modelMaxOutputSize,
});
return new KosongLLM({
provider,
Expand Down
2 changes: 2 additions & 0 deletions packages/agent-core/src/session/provider-manager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export interface ResolvedRuntimeProvider {
readonly providerName: string;
readonly provider: KosongProviderConfig;
readonly modelCapabilities: ModelCapability;
readonly maxOutputSize?: number;
}

interface ProviderManagerOptions {
Expand Down Expand Up @@ -115,6 +116,7 @@ export class ProviderManager implements ModelProvider {
providerName,
provider,
modelCapabilities: resolveModelCapabilities(alias, provider),
maxOutputSize: alias.maxOutputSize,
};
}

Expand Down
12 changes: 11 additions & 1 deletion packages/agent-core/src/utils/completion-budget.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,12 @@ const MIN_FLOOR = 1;
const DEFAULT_UNKNOWN_CONTEXT_FALLBACK = 32000;

/**
* Resolve configured completion budget. Env values are explicit hard caps;
* Resolve configured completion budget. Env values override model aliases;
* non-positive env values disable clamping.
*/
export function resolveCompletionBudget(args: {
readonly reservedContextSize?: number;
readonly maxOutputSize?: number;
readonly env?: NodeJS.ProcessEnv;
}): CompletionBudgetConfig | undefined {
const env = args.env ?? process.env;
Expand All @@ -28,6 +29,9 @@ export function resolveCompletionBudget(args: {
if (fromLegacy !== 'absent') {
return fromLegacy === 'disabled' ? undefined : { hardCap: fromLegacy };
}
if (args.maxOutputSize !== undefined) {
return { hardCap: args.maxOutputSize };
}
if (args.reservedContextSize !== undefined && args.reservedContextSize > 0) {
return { fallback: args.reservedContextSize };
}
Expand Down Expand Up @@ -78,6 +82,12 @@ export function applyCompletionBudget(args: {
}): ChatProvider {
if (args.budget === undefined) return args.provider;
if (args.provider.withMaxCompletionTokens === undefined) return args.provider;
if (
args.budget.hardCap === undefined &&
args.provider.completionBudgetStrategy === 'explicit-only'
) {
return args.provider;
Comment on lines +85 to +89
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P2 Badge Honor max_output_size for explicit-only providers

When an openai/openai_responses model alias sets max_output_size, resolveCompletionBudget still produces only a fallback unless an env var is set, and toKosongProviderConfig only forwards maxOutputSize to Anthropic (packages/agent-core/src/session/provider-manager.ts:220-260). Because this early return fires before withMaxCompletionTokens, those explicit-only aliases drop the configured per-alias cap and send no max_tokens/max_output_tokens, leaving users unable to cap OpenAI-compatible providers that reject oversized output budgets.

Useful? React with 👍 / 👎.

}
const cap = computeCompletionBudgetCap({
budget: args.budget,
capability: args.capability,
Expand Down
52 changes: 51 additions & 1 deletion packages/agent-core/test/agent/config-state.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import { describe, expect, it } from 'vitest';
import { emptyUsage } from '@moonshot-ai/kosong';

import { ProviderManager } from '../../src/session/provider-manager';
import { testAgent } from './harness';
import type { GenerateFn } from '../../src/agent/turn/kosong-llm';

describe('ConfigState model capabilities', () => {
it('computes provider and model capabilities from ProviderManager metadata', () => {
Expand Down Expand Up @@ -73,7 +75,55 @@ describe('ConfigState model capabilities', () => {
});
});

it('uses session id as a provider prompt cache hint without storing it on Agent', () => {
it('applies model max output size to OpenAI-compatible providers', async () => {
let generatedModelParameters: Record<string, unknown> | undefined;
const generate: GenerateFn = async (chat) => {
generatedModelParameters = (
chat as { readonly modelParameters?: Record<string, unknown> }
).modelParameters;
return {
id: 'response-1',
message: { role: 'assistant', content: [], toolCalls: [] },
usage: emptyUsage(),
finishReason: 'completed',
rawFinishReason: 'stop',
};
};
const ctx = testAgent({
generate,
providerManager: new ProviderManager({
config: {
providers: {
openai: {
type: 'openai',
apiKey: 'sk-openai',
baseUrl: 'https://openai.example/v1',
},
},
models: {
'gpt-alias': {
provider: 'openai',
model: 'gpt-runtime',
maxContextSize: 1_000_000,
maxOutputSize: 8192,
},
},
},
}),
});

ctx.agent.config.update({ modelAlias: 'gpt-alias' });

await ctx.agent.llm.chat({
messages: [],
tools: [],
signal: new AbortController().signal,
});

expect(generatedModelParameters).toMatchObject({ max_tokens: 8192 });
});

it('uses session id as a provider prompt cache hint without storing it on Agent', () => {
const ctx = testAgent({
providerManager: new ProviderManager({
promptCacheKey: 'session-test',
Expand Down
36 changes: 36 additions & 0 deletions packages/agent-core/test/harness/runtime-provider.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,42 @@ describe('resolveRuntimeProvider maxOutputSize forwarding', () => {
model: 'claude-opus-4-7',
defaultMaxTokens: 24000,
});
expect(resolved.maxOutputSize).toBe(24000);
});

it('exposes alias.maxOutputSize for OpenAI-compatible completion budgets', () => {
const resolved = resolveRuntimeProvider({
config: {
...BASE_CONFIG,
providers: {
...BASE_CONFIG.providers,
openai: {
type: 'openai',
apiKey: 'sk-openai',
baseUrl: 'https://openai.example/v1',
},
},
models: {
...BASE_CONFIG.models!,
'gpt-alias': {
provider: 'openai',
model: 'gpt-runtime',
maxContextSize: 200000,
maxOutputSize: 131072,
},
},
},
model: 'gpt-alias',
});

expect(resolved.provider).toMatchObject({
type: 'openai',
model: 'gpt-runtime',
apiKey: 'sk-openai',
baseUrl: 'https://openai.example/v1',
});
expect('defaultMaxTokens' in resolved.provider).toBe(false);
expect(resolved.maxOutputSize).toBe(131072);
});

it('omits defaultMaxTokens when alias.maxOutputSize is unset', () => {
Expand Down
38 changes: 38 additions & 0 deletions packages/agent-core/test/utils/completion-budget.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,35 @@ describe('applyCompletionBudget', () => {
expect(withMaxCompletionTokens.mock.calls[0]?.[0]).toBe(8192);
expect(result).not.toBe(original);
});

it('does not infer max tokens from fallback for explicit-only providers', () => {
const explicitOnlyProvider = {
...original,
completionBudgetStrategy: 'explicit-only' as const,
};
const result = applyCompletionBudget({
provider: explicitOnlyProvider,
budget: { fallback: 32000 },
capability: makeCapability(1048576),
});
expect(result).toBe(explicitOnlyProvider);
expect(withMaxCompletionTokens).not.toHaveBeenCalled();
});

it('still applies an explicit hard cap for explicit-only providers', () => {
const explicitOnlyProvider = {
...original,
completionBudgetStrategy: 'explicit-only' as const,
};
const result = applyCompletionBudget({
provider: explicitOnlyProvider,
budget: { hardCap: 8192 },
capability: makeCapability(1048576),
});
expect(withMaxCompletionTokens).toHaveBeenCalledOnce();
expect(withMaxCompletionTokens.mock.calls[0]?.[0]).toBe(8192);
expect(result).not.toBe(explicitOnlyProvider);
});
});

describe('resolveCompletionBudget', () => {
Expand All @@ -172,6 +201,15 @@ describe('resolveCompletionBudget', () => {
expect(budget?.hardCap).toBe(2048);
});

it('uses model max output size as the hard cap when no env var is set', () => {
const budget = resolveCompletionBudget({
reservedContextSize: 1000,
maxOutputSize: 8192,
env: {},
});
expect(budget).toEqual({ hardCap: 8192 });
});

it('uses reservedContextSize as the unknown-context fallback when no env var is set', () => {
const budget = resolveCompletionBudget({
reservedContextSize: 12345,
Expand Down
8 changes: 8 additions & 0 deletions packages/kosong/src/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ export interface GenerateOptions {
onStreamEnd?: () => void;
}

export type CompletionBudgetStrategy = 'inferred-and-explicit' | 'explicit-only';

/**
* In-memory video bytes for providers that require an uploaded file
* reference instead of an inline data URL.
Expand All @@ -133,6 +135,12 @@ export interface ChatProvider {
readonly name: string;
/** Model name passed to the upstream API (e.g. `"moonshot-v1-auto"`). */
readonly modelName: string;
/**
* Controls whether callers may derive a max-completion cap from catalog
* context windows. `explicit-only` providers should only receive configured
* hard caps because inferred context-sized caps can exceed their API limits.
*/
readonly completionBudgetStrategy?: CompletionBudgetStrategy;
/** Current thinking-effort level, or `null` if thinking is not configured. */
readonly thinkingEffort: ThinkingEffort | null;
/**
Expand Down
1 change: 1 addition & 0 deletions packages/kosong/src/providers/openai-legacy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -334,6 +334,7 @@ export class OpenAILegacyStreamedMessage implements StreamedMessage {
}
export class OpenAILegacyChatProvider implements ChatProvider {
readonly name: string = 'openai';
readonly completionBudgetStrategy = 'explicit-only';

private _model: string;
private _stream: boolean;
Expand Down
1 change: 1 addition & 0 deletions packages/kosong/src/providers/openai-responses.ts
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,7 @@ export class OpenAIResponsesStreamedMessage implements StreamedMessage {
}
export class OpenAIResponsesChatProvider implements ChatProvider {
readonly name: string = 'openai-responses';
readonly completionBudgetStrategy = 'explicit-only';

private _model: string;
private _stream: boolean;
Expand Down
4 changes: 4 additions & 0 deletions packages/kosong/test/openai-legacy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ const MUL_TOOL: Tool = {
};

describe('OpenAILegacyChatProvider', () => {
it('declares explicit-only completion budget strategy', () => {
expect(createProvider().completionBudgetStrategy).toBe('explicit-only');
});

describe('message conversion (COMMON_CASES)', () => {
it('simple user message with system prompt', async () => {
const provider = createProvider();
Expand Down
4 changes: 4 additions & 0 deletions packages/kosong/test/openai-responses.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,10 @@ const MUL_TOOL: Tool = {
};

describe('OpenAIResponsesChatProvider', () => {
it('declares explicit-only completion budget strategy', () => {
expect(createProvider().completionBudgetStrategy).toBe('explicit-only');
});

describe('message conversion', () => {
it('simple user message with system prompt', async () => {
const provider = createProvider();
Expand Down