diff --git a/.changeset/mimo-provider-compat.md b/.changeset/mimo-provider-compat.md new file mode 100644 index 00000000..c73928d5 --- /dev/null +++ b/.changeset/mimo-provider-compat.md @@ -0,0 +1,7 @@ +--- +"@moonshot-ai/kimi-code": patch +"@moonshot-ai/agent-core": patch +"@moonshot-ai/kosong": patch +--- + +Fix OpenAI-compatible provider requests for catalog-imported models with large context windows. diff --git a/packages/agent-core/src/agent/compaction/full.ts b/packages/agent-core/src/agent/compaction/full.ts index 3c52043a..cb768bbf 100644 --- a/packages/agent-core/src/agent/compaction/full.ts +++ b/packages/agent-core/src/agent/compaction/full.ts @@ -245,6 +245,7 @@ export class FullCompaction { provider: this.agent.config.provider, budget: resolveCompletionBudget({ reservedContextSize: this.agent.kimiConfig?.loopControl?.reservedContextSize, + maxOutputSize: this.agent.config.modelMaxOutputSize, }), capability: this.agent.config.modelCapabilities, }); diff --git a/packages/agent-core/src/agent/config/index.ts b/packages/agent-core/src/agent/config/index.ts index 45aca213..fba12bc5 100644 --- a/packages/agent-core/src/agent/config/index.ts +++ b/packages/agent-core/src/agent/config/index.ts @@ -127,6 +127,10 @@ export class ConfigState { return this.tryResolvedProviderConfig()?.modelCapabilities ?? UNKNOWN_CAPABILITY; } + get modelMaxOutputSize(): number | undefined { + return this.tryResolvedProviderConfig()?.maxOutputSize; + } + private get resolvedProviderConfig(): ResolvedRuntimeProvider | undefined { if (this._modelAlias === undefined) return undefined; return this.agent.modelProvider?.resolveProviderConfig(this._modelAlias); diff --git a/packages/agent-core/src/agent/index.ts b/packages/agent-core/src/agent/index.ts index 8ee06f26..b2784b9e 100644 --- a/packages/agent-core/src/agent/index.ts +++ b/packages/agent-core/src/agent/index.ts @@ -210,6 +210,7 @@ export class Agent { const loopControl = this.kimiConfig?.loopControl; const completionBudgetConfig = resolveCompletionBudget({ reservedContextSize: loopControl?.reservedContextSize, + maxOutputSize: this.config.modelMaxOutputSize, }); return new KosongLLM({ provider, diff --git a/packages/agent-core/src/session/provider-manager.ts b/packages/agent-core/src/session/provider-manager.ts index f675c126..97b0586d 100644 --- a/packages/agent-core/src/session/provider-manager.ts +++ b/packages/agent-core/src/session/provider-manager.ts @@ -17,6 +17,7 @@ export interface ResolvedRuntimeProvider { readonly providerName: string; readonly provider: KosongProviderConfig; readonly modelCapabilities: ModelCapability; + readonly maxOutputSize?: number; } interface ProviderManagerOptions { @@ -115,6 +116,7 @@ export class ProviderManager implements ModelProvider { providerName, provider, modelCapabilities: resolveModelCapabilities(alias, provider), + maxOutputSize: alias.maxOutputSize, }; } diff --git a/packages/agent-core/src/utils/completion-budget.ts b/packages/agent-core/src/utils/completion-budget.ts index 5136ec60..118a9565 100644 --- a/packages/agent-core/src/utils/completion-budget.ts +++ b/packages/agent-core/src/utils/completion-budget.ts @@ -12,11 +12,12 @@ const MIN_FLOOR = 1; const DEFAULT_UNKNOWN_CONTEXT_FALLBACK = 32000; /** - * Resolve configured completion budget. Env values are explicit hard caps; + * Resolve configured completion budget. Env values override model aliases; * non-positive env values disable clamping. */ export function resolveCompletionBudget(args: { readonly reservedContextSize?: number; + readonly maxOutputSize?: number; readonly env?: NodeJS.ProcessEnv; }): CompletionBudgetConfig | undefined { const env = args.env ?? process.env; @@ -28,6 +29,9 @@ export function resolveCompletionBudget(args: { if (fromLegacy !== 'absent') { return fromLegacy === 'disabled' ? undefined : { hardCap: fromLegacy }; } + if (args.maxOutputSize !== undefined) { + return { hardCap: args.maxOutputSize }; + } if (args.reservedContextSize !== undefined && args.reservedContextSize > 0) { return { fallback: args.reservedContextSize }; } @@ -78,6 +82,12 @@ export function applyCompletionBudget(args: { }): ChatProvider { if (args.budget === undefined) return args.provider; if (args.provider.withMaxCompletionTokens === undefined) return args.provider; + if ( + args.budget.hardCap === undefined && + args.provider.completionBudgetStrategy === 'explicit-only' + ) { + return args.provider; + } const cap = computeCompletionBudgetCap({ budget: args.budget, capability: args.capability, diff --git a/packages/agent-core/test/agent/config-state.test.ts b/packages/agent-core/test/agent/config-state.test.ts index f130f57c..2252f909 100644 --- a/packages/agent-core/test/agent/config-state.test.ts +++ b/packages/agent-core/test/agent/config-state.test.ts @@ -1,7 +1,9 @@ import { describe, expect, it } from 'vitest'; +import { emptyUsage } from '@moonshot-ai/kosong'; import { ProviderManager } from '../../src/session/provider-manager'; import { testAgent } from './harness'; +import type { GenerateFn } from '../../src/agent/turn/kosong-llm'; describe('ConfigState model capabilities', () => { it('computes provider and model capabilities from ProviderManager metadata', () => { @@ -73,7 +75,55 @@ describe('ConfigState model capabilities', () => { }); }); -it('uses session id as a provider prompt cache hint without storing it on Agent', () => { + it('applies model max output size to OpenAI-compatible providers', async () => { + let generatedModelParameters: Record | undefined; + const generate: GenerateFn = async (chat) => { + generatedModelParameters = ( + chat as { readonly modelParameters?: Record } + ).modelParameters; + return { + id: 'response-1', + message: { role: 'assistant', content: [], toolCalls: [] }, + usage: emptyUsage(), + finishReason: 'completed', + rawFinishReason: 'stop', + }; + }; + const ctx = testAgent({ + generate, + providerManager: new ProviderManager({ + config: { + providers: { + openai: { + type: 'openai', + apiKey: 'sk-openai', + baseUrl: 'https://openai.example/v1', + }, + }, + models: { + 'gpt-alias': { + provider: 'openai', + model: 'gpt-runtime', + maxContextSize: 1_000_000, + maxOutputSize: 8192, + }, + }, + }, + }), + }); + + ctx.agent.config.update({ modelAlias: 'gpt-alias' }); + + await ctx.agent.llm.chat({ + messages: [], + tools: [], + signal: new AbortController().signal, + }); + + expect(generatedModelParameters).toMatchObject({ max_tokens: 8192 }); + }); + + it('uses session id as a provider prompt cache hint without storing it on Agent', () => { const ctx = testAgent({ providerManager: new ProviderManager({ promptCacheKey: 'session-test', diff --git a/packages/agent-core/test/harness/runtime-provider.test.ts b/packages/agent-core/test/harness/runtime-provider.test.ts index 6ef2c3c7..46af73bb 100644 --- a/packages/agent-core/test/harness/runtime-provider.test.ts +++ b/packages/agent-core/test/harness/runtime-provider.test.ts @@ -253,6 +253,42 @@ describe('resolveRuntimeProvider maxOutputSize forwarding', () => { model: 'claude-opus-4-7', defaultMaxTokens: 24000, }); + expect(resolved.maxOutputSize).toBe(24000); + }); + + it('exposes alias.maxOutputSize for OpenAI-compatible completion budgets', () => { + const resolved = resolveRuntimeProvider({ + config: { + ...BASE_CONFIG, + providers: { + ...BASE_CONFIG.providers, + openai: { + type: 'openai', + apiKey: 'sk-openai', + baseUrl: 'https://openai.example/v1', + }, + }, + models: { + ...BASE_CONFIG.models!, + 'gpt-alias': { + provider: 'openai', + model: 'gpt-runtime', + maxContextSize: 200000, + maxOutputSize: 131072, + }, + }, + }, + model: 'gpt-alias', + }); + + expect(resolved.provider).toMatchObject({ + type: 'openai', + model: 'gpt-runtime', + apiKey: 'sk-openai', + baseUrl: 'https://openai.example/v1', + }); + expect('defaultMaxTokens' in resolved.provider).toBe(false); + expect(resolved.maxOutputSize).toBe(131072); }); it('omits defaultMaxTokens when alias.maxOutputSize is unset', () => { diff --git a/packages/agent-core/test/utils/completion-budget.test.ts b/packages/agent-core/test/utils/completion-budget.test.ts index 9b75f5f1..6cba2cc2 100644 --- a/packages/agent-core/test/utils/completion-budget.test.ts +++ b/packages/agent-core/test/utils/completion-budget.test.ts @@ -150,6 +150,35 @@ describe('applyCompletionBudget', () => { expect(withMaxCompletionTokens.mock.calls[0]?.[0]).toBe(8192); expect(result).not.toBe(original); }); + + it('does not infer max tokens from fallback for explicit-only providers', () => { + const explicitOnlyProvider = { + ...original, + completionBudgetStrategy: 'explicit-only' as const, + }; + const result = applyCompletionBudget({ + provider: explicitOnlyProvider, + budget: { fallback: 32000 }, + capability: makeCapability(1048576), + }); + expect(result).toBe(explicitOnlyProvider); + expect(withMaxCompletionTokens).not.toHaveBeenCalled(); + }); + + it('still applies an explicit hard cap for explicit-only providers', () => { + const explicitOnlyProvider = { + ...original, + completionBudgetStrategy: 'explicit-only' as const, + }; + const result = applyCompletionBudget({ + provider: explicitOnlyProvider, + budget: { hardCap: 8192 }, + capability: makeCapability(1048576), + }); + expect(withMaxCompletionTokens).toHaveBeenCalledOnce(); + expect(withMaxCompletionTokens.mock.calls[0]?.[0]).toBe(8192); + expect(result).not.toBe(explicitOnlyProvider); + }); }); describe('resolveCompletionBudget', () => { @@ -172,6 +201,15 @@ describe('resolveCompletionBudget', () => { expect(budget?.hardCap).toBe(2048); }); + it('uses model max output size as the hard cap when no env var is set', () => { + const budget = resolveCompletionBudget({ + reservedContextSize: 1000, + maxOutputSize: 8192, + env: {}, + }); + expect(budget).toEqual({ hardCap: 8192 }); + }); + it('uses reservedContextSize as the unknown-context fallback when no env var is set', () => { const budget = resolveCompletionBudget({ reservedContextSize: 12345, diff --git a/packages/kosong/src/provider.ts b/packages/kosong/src/provider.ts index c5d7624f..0ed90ff8 100644 --- a/packages/kosong/src/provider.ts +++ b/packages/kosong/src/provider.ts @@ -110,6 +110,8 @@ export interface GenerateOptions { onStreamEnd?: () => void; } +export type CompletionBudgetStrategy = 'inferred-and-explicit' | 'explicit-only'; + /** * In-memory video bytes for providers that require an uploaded file * reference instead of an inline data URL. @@ -133,6 +135,12 @@ export interface ChatProvider { readonly name: string; /** Model name passed to the upstream API (e.g. `"moonshot-v1-auto"`). */ readonly modelName: string; + /** + * Controls whether callers may derive a max-completion cap from catalog + * context windows. `explicit-only` providers should only receive configured + * hard caps because inferred context-sized caps can exceed their API limits. + */ + readonly completionBudgetStrategy?: CompletionBudgetStrategy; /** Current thinking-effort level, or `null` if thinking is not configured. */ readonly thinkingEffort: ThinkingEffort | null; /** diff --git a/packages/kosong/src/providers/openai-legacy.ts b/packages/kosong/src/providers/openai-legacy.ts index e050e9ed..7d944ccd 100644 --- a/packages/kosong/src/providers/openai-legacy.ts +++ b/packages/kosong/src/providers/openai-legacy.ts @@ -334,6 +334,7 @@ export class OpenAILegacyStreamedMessage implements StreamedMessage { } export class OpenAILegacyChatProvider implements ChatProvider { readonly name: string = 'openai'; + readonly completionBudgetStrategy = 'explicit-only'; private _model: string; private _stream: boolean; diff --git a/packages/kosong/src/providers/openai-responses.ts b/packages/kosong/src/providers/openai-responses.ts index 59c0c852..702c2c12 100644 --- a/packages/kosong/src/providers/openai-responses.ts +++ b/packages/kosong/src/providers/openai-responses.ts @@ -847,6 +847,7 @@ export class OpenAIResponsesStreamedMessage implements StreamedMessage { } export class OpenAIResponsesChatProvider implements ChatProvider { readonly name: string = 'openai-responses'; + readonly completionBudgetStrategy = 'explicit-only'; private _model: string; private _stream: boolean; diff --git a/packages/kosong/test/openai-legacy.test.ts b/packages/kosong/test/openai-legacy.test.ts index d51f8d8a..8d2e7aae 100644 --- a/packages/kosong/test/openai-legacy.test.ts +++ b/packages/kosong/test/openai-legacy.test.ts @@ -90,6 +90,10 @@ const MUL_TOOL: Tool = { }; describe('OpenAILegacyChatProvider', () => { + it('declares explicit-only completion budget strategy', () => { + expect(createProvider().completionBudgetStrategy).toBe('explicit-only'); + }); + describe('message conversion (COMMON_CASES)', () => { it('simple user message with system prompt', async () => { const provider = createProvider(); diff --git a/packages/kosong/test/openai-responses.test.ts b/packages/kosong/test/openai-responses.test.ts index 572527cb..01c4d071 100644 --- a/packages/kosong/test/openai-responses.test.ts +++ b/packages/kosong/test/openai-responses.test.ts @@ -90,6 +90,10 @@ const MUL_TOOL: Tool = { }; describe('OpenAIResponsesChatProvider', () => { + it('declares explicit-only completion budget strategy', () => { + expect(createProvider().completionBudgetStrategy).toBe('explicit-only'); + }); + describe('message conversion', () => { it('simple user message with system prompt', async () => { const provider = createProvider();