From 2549cd8e290af47dc3ecef2c85380874fe60c547 Mon Sep 17 00:00:00 2001 From: yao Date: Mon, 1 Jun 2026 13:17:11 +0800 Subject: [PATCH 1/3] fix(ai): recover from invalid tool-call input instead of aborting the stream MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DurableAgent.executeTool threw when a tool call's arguments failed inputSchema validation (and no experimental_repairToolCall fixed it), aborting the whole agent stream — which fails the entire durable workflow run. Tool *execution* errors are already recovered (returned to the model as an error-text tool result so the agent can self-correct); this makes input parse/validation failures consistent: return the error as an error-text tool result instead of throwing, so a single occasionally-malformed model tool-call can no longer kill a long-running task. Aligns with AI SDK streamText behavior. Signed-off-by: yao --- .changeset/recover-invalid-tool-input.md | 7 ++ packages/ai/src/agent/durable-agent.test.ts | 76 +++++++++++++++++++++ packages/ai/src/agent/durable-agent.ts | 16 ++++- 3 files changed, 98 insertions(+), 1 deletion(-) create mode 100644 .changeset/recover-invalid-tool-input.md diff --git a/.changeset/recover-invalid-tool-input.md b/.changeset/recover-invalid-tool-input.md new file mode 100644 index 0000000000..65be97da97 --- /dev/null +++ b/.changeset/recover-invalid-tool-input.md @@ -0,0 +1,7 @@ +--- +'@workflow/ai': patch +--- + +DurableAgent: recover from invalid tool-call input instead of aborting the stream + +When a model emits a tool call whose arguments fail `inputSchema` validation (and no `experimental_repairToolCall` fixes it), `executeTool` now returns the validation error to the model as an `error-text` tool result — the same way tool *execution* errors are already handled — instead of throwing and aborting the whole agent stream. In a durable workflow that throw fails the entire run, so a single occasionally-malformed model tool-call could kill a long-running task with no chance for the agent to self-correct. The agent now sees the error as a tool result and can fix the arguments and retry within its step budget. diff --git a/packages/ai/src/agent/durable-agent.test.ts b/packages/ai/src/agent/durable-agent.test.ts index f78346ade2..2f3ff01343 100644 --- a/packages/ai/src/agent/durable-agent.test.ts +++ b/packages/ai/src/agent/durable-agent.test.ts @@ -2006,6 +2006,82 @@ describe('DurableAgent', () => { }); }); + it('should convert invalid tool input to error-text result instead of failing stream', async () => { + const tools: ToolSet = { + strictTool: { + description: 'A tool with a strict input schema', + inputSchema: z.object({ requiredField: z.string().min(1) }), + execute: async () => ({ ok: true }), + }, + }; + + const mockModel = createMockModel(); + + const agent = new DurableAgent({ + model: async () => mockModel, + tools, + }); + + const mockWritable = new WritableStream({ + write: vi.fn(), + close: vi.fn(), + }); + + const mockMessages: LanguageModelV3Prompt = [ + { role: 'user', content: [{ type: 'text', text: 'test' }] }, + ]; + + const { streamTextIterator } = await import('./stream-text-iterator.js'); + const mockIterator = { + next: vi + .fn() + .mockResolvedValueOnce({ + done: false, + value: { + toolCalls: [ + { + toolCallId: 'test-call-id', + toolName: 'strictTool', + // Valid JSON, but violates the schema (empty string fails .min(1)). + input: '{"requiredField":""}', + } as LanguageModelV3ToolCall, + ], + messages: mockMessages, + }, + }) + .mockResolvedValueOnce({ done: true, value: [] }), + }; + vi.mocked(streamTextIterator).mockReturnValue( + mockIterator as unknown as MockIterator + ); + + // Invalid tool input should be handled gracefully, not reject the stream. + await expect( + agent.stream({ + messages: [{ role: 'user', content: 'test' }], + writable: mockWritable, + }) + ).resolves.not.toThrow(); + + // Verify the validation error was sent back as an error-text tool result + // (so the model can correct its arguments and retry). + expect(mockIterator.next).toHaveBeenCalledTimes(2); + const toolResultsCall = mockIterator.next.mock.calls[1][0]; + expect(toolResultsCall).toBeDefined(); + expect(toolResultsCall).toHaveLength(1); + expect(toolResultsCall[0]).toMatchObject({ + type: 'tool-result', + toolCallId: 'test-call-id', + toolName: 'strictTool', + output: { + type: 'error-text', + }, + }); + expect(toolResultsCall[0].output.value).toContain( + 'Invalid input for tool "strictTool"' + ); + }); + it('should call onFinish with steps and messages when streaming completes', async () => { const mockModel = createMockModel(); diff --git a/packages/ai/src/agent/durable-agent.ts b/packages/ai/src/agent/durable-agent.ts index b3fbfe6e3c..c96ba9183a 100644 --- a/packages/ai/src/agent/durable-agent.ts +++ b/packages/ai/src/agent/durable-agent.ts @@ -1656,7 +1656,21 @@ async function executeTool( ); } } - throw parseError; + // Input that fails to parse or validate (even after repair) is recoverable, + // exactly like a tool execution error below: feed the error back to the model + // as an error-text result so the agent can correct the call and retry, instead + // of aborting the entire stream. This aligns with AI SDK's streamText behavior + // for tool failures. Reaches here both for malformed JSON and for the + // re-thrown "Invalid input for tool ..." schema-validation error above. + return { + type: 'tool-result' as const, + toolCallId: toolCall.toolCallId, + toolName: toolCall.toolName, + output: { + type: 'error-text' as const, + value: getErrorMessage(parseError), + }, + }; } return recordSpan({ From c045cdd969834760080b1005119581dc4894ab43 Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Mon, 29 Jun 2026 12:14:39 -0700 Subject: [PATCH 2/3] fix(ai): emit ai.toolCall span for recovered invalid tool input; test productive recovery Address review feedback on recovering from invalid tool-call input: - The invalid-input recovery path no longer threw, so it produced no ai.toolCall span (the execute()-error path still does). Emit a span here that records the validation error and ERROR status, so the recovered failure stays observable in traces even though it is intentionally not surfaced via onError (matching tool-execution errors and AI SDK). - Add a test that drives invalid -> corrected tool call and asserts the tool actually executes once with the fixed input, proving the agent productively self-corrects rather than only feeding the error back. Co-Authored-By: Claude Opus 4.8 (1M context) --- packages/ai/src/agent/durable-agent.test.ts | 78 +++++++++++++++++++++ packages/ai/src/agent/durable-agent.ts | 40 ++++++++--- 2 files changed, 110 insertions(+), 8 deletions(-) diff --git a/packages/ai/src/agent/durable-agent.test.ts b/packages/ai/src/agent/durable-agent.test.ts index 2f3ff01343..5e7040c920 100644 --- a/packages/ai/src/agent/durable-agent.test.ts +++ b/packages/ai/src/agent/durable-agent.test.ts @@ -2082,6 +2082,84 @@ describe('DurableAgent', () => { ); }); + it('should recover from invalid tool input and execute the corrected retry', async () => { + const execute = vi.fn(async () => ({ ok: true })); + const tools: ToolSet = { + strictTool: { + description: 'A tool with a strict input schema', + inputSchema: z.object({ requiredField: z.string().min(1) }), + execute, + }, + }; + + const mockModel = createMockModel(); + const agent = new DurableAgent({ model: async () => mockModel, tools }); + const mockWritable = new WritableStream({ + write: vi.fn(), + close: vi.fn(), + }); + const mockMessages: LanguageModelV3Prompt = [ + { role: 'user', content: [{ type: 'text', text: 'test' }] }, + ]; + + const makeToolCall = (input: string): LanguageModelV3ToolCall => ({ + toolCallId: 'test-call-id', + toolName: 'strictTool', + input, + }); + + const { streamTextIterator } = await import('./stream-text-iterator.js'); + const mockIterator = { + next: vi + .fn() + // Step 1: model emits invalid args (empty string fails .min(1)). + .mockResolvedValueOnce({ + done: false, + value: { + toolCalls: [makeToolCall('{"requiredField":""}')], + messages: mockMessages, + }, + }) + // Step 2: model corrects the args after seeing the error-text result. + .mockResolvedValueOnce({ + done: false, + value: { + toolCalls: [makeToolCall('{"requiredField":"ok"}')], + messages: mockMessages, + }, + }) + .mockResolvedValueOnce({ done: true, value: [] }), + }; + vi.mocked(streamTextIterator).mockReturnValue( + mockIterator as unknown as MockIterator + ); + + await expect( + agent.stream({ + messages: [{ role: 'user', content: 'test' }], + writable: mockWritable, + }) + ).resolves.not.toThrow(); + + // The tool must NOT run on the invalid call, and MUST run exactly once with + // the corrected input — proving the agent productively recovers, not just + // that the error was fed back. + expect(execute).toHaveBeenCalledTimes(1); + expect(execute.mock.calls[0][0]).toEqual({ requiredField: 'ok' }); + + // First turn fed back an error-text result; second turn produced a success. + expect(mockIterator.next).toHaveBeenCalledTimes(3); + const firstResults = mockIterator.next.mock.calls[1][0]; + expect(firstResults[0].output.type).toBe('error-text'); + const secondResults = mockIterator.next.mock.calls[2][0]; + expect(secondResults[0]).toMatchObject({ + type: 'tool-result', + toolCallId: 'test-call-id', + toolName: 'strictTool', + output: { type: 'json', value: { ok: true } }, + }); + }); + it('should call onFinish with steps and messages when streaming completes', async () => { const mockModel = createMockModel(); diff --git a/packages/ai/src/agent/durable-agent.ts b/packages/ai/src/agent/durable-agent.ts index c96ba9183a..9c0e6628a8 100644 --- a/packages/ai/src/agent/durable-agent.ts +++ b/packages/ai/src/agent/durable-agent.ts @@ -1662,15 +1662,39 @@ async function executeTool( // of aborting the entire stream. This aligns with AI SDK's streamText behavior // for tool failures. Reaches here both for malformed JSON and for the // re-thrown "Invalid input for tool ..." schema-validation error above. - return { - type: 'tool-result' as const, - toolCallId: toolCall.toolCallId, - toolName: toolCall.toolName, - output: { - type: 'error-text' as const, - value: getErrorMessage(parseError), + // + // This path intentionally does not reach `onError` (it no longer throws), + // matching the tool-execution-error path below. Emit an `ai.toolCall` span + // recording the failure so the recovered error stays observable in traces. + const parseErrorMessage = getErrorMessage(parseError); + return recordSpan({ + name: 'ai.toolCall', + telemetry, + attributes: { + 'ai.toolCall.name': toolCall.toolName, + 'ai.toolCall.id': toolCall.toolCallId, + ...(telemetry?.recordOutputs !== false && { + 'ai.toolCall.args': toolCall.input, + }), }, - }; + fn: (span) => { + if (span) { + // 2 === OTel SpanStatusCode.ERROR (inlined to avoid a hard dependency + // on the optional @opentelemetry/api package). + span.setStatus({ code: 2, message: parseErrorMessage }); + span.setAttributes({ 'ai.toolCall.error': parseErrorMessage }); + } + return { + type: 'tool-result' as const, + toolCallId: toolCall.toolCallId, + toolName: toolCall.toolName, + output: { + type: 'error-text' as const, + value: parseErrorMessage, + }, + }; + }, + }); } return recordSpan({ From e2266ddd690975e385ee1f4d17dfe4f40110544d Mon Sep 17 00:00:00 2001 From: Peter Wielander Date: Mon, 29 Jun 2026 13:16:51 -0700 Subject: [PATCH 3/3] chore(changeset): condense to a single sentence Co-Authored-By: Claude Opus 4.8 (1M context) --- .changeset/recover-invalid-tool-input.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.changeset/recover-invalid-tool-input.md b/.changeset/recover-invalid-tool-input.md index 65be97da97..e48258a9b1 100644 --- a/.changeset/recover-invalid-tool-input.md +++ b/.changeset/recover-invalid-tool-input.md @@ -2,6 +2,4 @@ '@workflow/ai': patch --- -DurableAgent: recover from invalid tool-call input instead of aborting the stream - -When a model emits a tool call whose arguments fail `inputSchema` validation (and no `experimental_repairToolCall` fixes it), `executeTool` now returns the validation error to the model as an `error-text` tool result — the same way tool *execution* errors are already handled — instead of throwing and aborting the whole agent stream. In a durable workflow that throw fails the entire run, so a single occasionally-malformed model tool-call could kill a long-running task with no chance for the agent to self-correct. The agent now sees the error as a tool result and can fix the arguments and retry within its step budget. +DurableAgent now recovers from invalid tool-call input by returning the validation error to the model instead of aborting the stream.