From 2549cd8e290af47dc3ecef2c85380874fe60c547 Mon Sep 17 00:00:00 2001
From: yao <zhangyaoruo@outlook.com>
Date: Mon, 1 Jun 2026 13:17:11 +0800
Subject: [PATCH 1/3] fix(ai): recover from invalid tool-call input instead of
 aborting the stream
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

DurableAgent.executeTool threw when a tool call's arguments failed inputSchema
validation (and no experimental_repairToolCall fixed it), aborting the whole
agent stream — which fails the entire durable workflow run. Tool *execution*
errors are already recovered (returned to the model as an error-text tool
result so the agent can self-correct); this makes input parse/validation
failures consistent: return the error as an error-text tool result instead of
throwing, so a single occasionally-malformed model tool-call can no longer kill
a long-running task. Aligns with AI SDK streamText behavior.

Signed-off-by: yao <zhangyaoruo@outlook.com>
---
 .changeset/recover-invalid-tool-input.md    |  7 ++
 packages/ai/src/agent/durable-agent.test.ts | 76 +++++++++++++++++++++
 packages/ai/src/agent/durable-agent.ts      | 16 ++++-
 3 files changed, 98 insertions(+), 1 deletion(-)
 create mode 100644 .changeset/recover-invalid-tool-input.md

diff --git a/.changeset/recover-invalid-tool-input.md b/.changeset/recover-invalid-tool-input.md
new file mode 100644
index 0000000000..65be97da97
--- /dev/null
+++ b/.changeset/recover-invalid-tool-input.md
@@ -0,0 +1,7 @@
+---
+'@workflow/ai': patch
+---
+
+DurableAgent: recover from invalid tool-call input instead of aborting the stream
+
+When a model emits a tool call whose arguments fail `inputSchema` validation (and no `experimental_repairToolCall` fixes it), `executeTool` now returns the validation error to the model as an `error-text` tool result — the same way tool *execution* errors are already handled — instead of throwing and aborting the whole agent stream. In a durable workflow that throw fails the entire run, so a single occasionally-malformed model tool-call could kill a long-running task with no chance for the agent to self-correct. The agent now sees the error as a tool result and can fix the arguments and retry within its step budget.
diff --git a/packages/ai/src/agent/durable-agent.test.ts b/packages/ai/src/agent/durable-agent.test.ts
index f78346ade2..2f3ff01343 100644
--- a/packages/ai/src/agent/durable-agent.test.ts
+++ b/packages/ai/src/agent/durable-agent.test.ts
@@ -2006,6 +2006,82 @@ describe('DurableAgent', () => {
       });
     });
 
+    it('should convert invalid tool input to error-text result instead of failing stream', async () => {
+      const tools: ToolSet = {
+        strictTool: {
+          description: 'A tool with a strict input schema',
+          inputSchema: z.object({ requiredField: z.string().min(1) }),
+          execute: async () => ({ ok: true }),
+        },
+      };
+
+      const mockModel = createMockModel();
+
+      const agent = new DurableAgent({
+        model: async () => mockModel,
+        tools,
+      });
+
+      const mockWritable = new WritableStream({
+        write: vi.fn(),
+        close: vi.fn(),
+      });
+
+      const mockMessages: LanguageModelV3Prompt = [
+        { role: 'user', content: [{ type: 'text', text: 'test' }] },
+      ];
+
+      const { streamTextIterator } = await import('./stream-text-iterator.js');
+      const mockIterator = {
+        next: vi
+          .fn()
+          .mockResolvedValueOnce({
+            done: false,
+            value: {
+              toolCalls: [
+                {
+                  toolCallId: 'test-call-id',
+                  toolName: 'strictTool',
+                  // Valid JSON, but violates the schema (empty string fails .min(1)).
+                  input: '{"requiredField":""}',
+                } as LanguageModelV3ToolCall,
+              ],
+              messages: mockMessages,
+            },
+          })
+          .mockResolvedValueOnce({ done: true, value: [] }),
+      };
+      vi.mocked(streamTextIterator).mockReturnValue(
+        mockIterator as unknown as MockIterator
+      );
+
+      // Invalid tool input should be handled gracefully, not reject the stream.
+      await expect(
+        agent.stream({
+          messages: [{ role: 'user', content: 'test' }],
+          writable: mockWritable,
+        })
+      ).resolves.not.toThrow();
+
+      // Verify the validation error was sent back as an error-text tool result
+      // (so the model can correct its arguments and retry).
+      expect(mockIterator.next).toHaveBeenCalledTimes(2);
+      const toolResultsCall = mockIterator.next.mock.calls[1][0];
+      expect(toolResultsCall).toBeDefined();
+      expect(toolResultsCall).toHaveLength(1);
+      expect(toolResultsCall[0]).toMatchObject({
+        type: 'tool-result',
+        toolCallId: 'test-call-id',
+        toolName: 'strictTool',
+        output: {
+          type: 'error-text',
+        },
+      });
+      expect(toolResultsCall[0].output.value).toContain(
+        'Invalid input for tool "strictTool"'
+      );
+    });
+
     it('should call onFinish with steps and messages when streaming completes', async () => {
       const mockModel = createMockModel();
 
diff --git a/packages/ai/src/agent/durable-agent.ts b/packages/ai/src/agent/durable-agent.ts
index b3fbfe6e3c..c96ba9183a 100644
--- a/packages/ai/src/agent/durable-agent.ts
+++ b/packages/ai/src/agent/durable-agent.ts
@@ -1656,7 +1656,21 @@ async function executeTool(
         );
       }
     }
-    throw parseError;
+    // Input that fails to parse or validate (even after repair) is recoverable,
+    // exactly like a tool execution error below: feed the error back to the model
+    // as an error-text result so the agent can correct the call and retry, instead
+    // of aborting the entire stream. This aligns with AI SDK's streamText behavior
+    // for tool failures. Reaches here both for malformed JSON and for the
+    // re-thrown "Invalid input for tool ..." schema-validation error above.
+    return {
+      type: 'tool-result' as const,
+      toolCallId: toolCall.toolCallId,
+      toolName: toolCall.toolName,
+      output: {
+        type: 'error-text' as const,
+        value: getErrorMessage(parseError),
+      },
+    };
   }
 
   return recordSpan({

From c045cdd969834760080b1005119581dc4894ab43 Mon Sep 17 00:00:00 2001
From: Peter Wielander <peter.wielander@vercel.com>
Date: Mon, 29 Jun 2026 12:14:39 -0700
Subject: [PATCH 2/3] fix(ai): emit ai.toolCall span for recovered invalid tool
 input; test productive recovery

Address review feedback on recovering from invalid tool-call input:

- The invalid-input recovery path no longer threw, so it produced no
  ai.toolCall span (the execute()-error path still does). Emit a span here
  that records the validation error and ERROR status, so the recovered
  failure stays observable in traces even though it is intentionally not
  surfaced via onError (matching tool-execution errors and AI SDK).
- Add a test that drives invalid -> corrected tool call and asserts the
  tool actually executes once with the fixed input, proving the agent
  productively self-corrects rather than only feeding the error back.

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 packages/ai/src/agent/durable-agent.test.ts | 78 +++++++++++++++++++++
 packages/ai/src/agent/durable-agent.ts      | 40 ++++++++---
 2 files changed, 110 insertions(+), 8 deletions(-)

diff --git a/packages/ai/src/agent/durable-agent.test.ts b/packages/ai/src/agent/durable-agent.test.ts
index 2f3ff01343..5e7040c920 100644
--- a/packages/ai/src/agent/durable-agent.test.ts
+++ b/packages/ai/src/agent/durable-agent.test.ts
@@ -2082,6 +2082,84 @@ describe('DurableAgent', () => {
       );
     });
 
+    it('should recover from invalid tool input and execute the corrected retry', async () => {
+      const execute = vi.fn(async () => ({ ok: true }));
+      const tools: ToolSet = {
+        strictTool: {
+          description: 'A tool with a strict input schema',
+          inputSchema: z.object({ requiredField: z.string().min(1) }),
+          execute,
+        },
+      };
+
+      const mockModel = createMockModel();
+      const agent = new DurableAgent({ model: async () => mockModel, tools });
+      const mockWritable = new WritableStream({
+        write: vi.fn(),
+        close: vi.fn(),
+      });
+      const mockMessages: LanguageModelV3Prompt = [
+        { role: 'user', content: [{ type: 'text', text: 'test' }] },
+      ];
+
+      const makeToolCall = (input: string): LanguageModelV3ToolCall => ({
+        toolCallId: 'test-call-id',
+        toolName: 'strictTool',
+        input,
+      });
+
+      const { streamTextIterator } = await import('./stream-text-iterator.js');
+      const mockIterator = {
+        next: vi
+          .fn()
+          // Step 1: model emits invalid args (empty string fails .min(1)).
+          .mockResolvedValueOnce({
+            done: false,
+            value: {
+              toolCalls: [makeToolCall('{"requiredField":""}')],
+              messages: mockMessages,
+            },
+          })
+          // Step 2: model corrects the args after seeing the error-text result.
+          .mockResolvedValueOnce({
+            done: false,
+            value: {
+              toolCalls: [makeToolCall('{"requiredField":"ok"}')],
+              messages: mockMessages,
+            },
+          })
+          .mockResolvedValueOnce({ done: true, value: [] }),
+      };
+      vi.mocked(streamTextIterator).mockReturnValue(
+        mockIterator as unknown as MockIterator
+      );
+
+      await expect(
+        agent.stream({
+          messages: [{ role: 'user', content: 'test' }],
+          writable: mockWritable,
+        })
+      ).resolves.not.toThrow();
+
+      // The tool must NOT run on the invalid call, and MUST run exactly once with
+      // the corrected input — proving the agent productively recovers, not just
+      // that the error was fed back.
+      expect(execute).toHaveBeenCalledTimes(1);
+      expect(execute.mock.calls[0][0]).toEqual({ requiredField: 'ok' });
+
+      // First turn fed back an error-text result; second turn produced a success.
+      expect(mockIterator.next).toHaveBeenCalledTimes(3);
+      const firstResults = mockIterator.next.mock.calls[1][0];
+      expect(firstResults[0].output.type).toBe('error-text');
+      const secondResults = mockIterator.next.mock.calls[2][0];
+      expect(secondResults[0]).toMatchObject({
+        type: 'tool-result',
+        toolCallId: 'test-call-id',
+        toolName: 'strictTool',
+        output: { type: 'json', value: { ok: true } },
+      });
+    });
+
     it('should call onFinish with steps and messages when streaming completes', async () => {
       const mockModel = createMockModel();
 
diff --git a/packages/ai/src/agent/durable-agent.ts b/packages/ai/src/agent/durable-agent.ts
index c96ba9183a..9c0e6628a8 100644
--- a/packages/ai/src/agent/durable-agent.ts
+++ b/packages/ai/src/agent/durable-agent.ts
@@ -1662,15 +1662,39 @@ async function executeTool(
     // of aborting the entire stream. This aligns with AI SDK's streamText behavior
     // for tool failures. Reaches here both for malformed JSON and for the
     // re-thrown "Invalid input for tool ..." schema-validation error above.
-    return {
-      type: 'tool-result' as const,
-      toolCallId: toolCall.toolCallId,
-      toolName: toolCall.toolName,
-      output: {
-        type: 'error-text' as const,
-        value: getErrorMessage(parseError),
+    //
+    // This path intentionally does not reach `onError` (it no longer throws),
+    // matching the tool-execution-error path below. Emit an `ai.toolCall` span
+    // recording the failure so the recovered error stays observable in traces.
+    const parseErrorMessage = getErrorMessage(parseError);
+    return recordSpan({
+      name: 'ai.toolCall',
+      telemetry,
+      attributes: {
+        'ai.toolCall.name': toolCall.toolName,
+        'ai.toolCall.id': toolCall.toolCallId,
+        ...(telemetry?.recordOutputs !== false && {
+          'ai.toolCall.args': toolCall.input,
+        }),
       },
-    };
+      fn: (span) => {
+        if (span) {
+          // 2 === OTel SpanStatusCode.ERROR (inlined to avoid a hard dependency
+          // on the optional @opentelemetry/api package).
+          span.setStatus({ code: 2, message: parseErrorMessage });
+          span.setAttributes({ 'ai.toolCall.error': parseErrorMessage });
+        }
+        return {
+          type: 'tool-result' as const,
+          toolCallId: toolCall.toolCallId,
+          toolName: toolCall.toolName,
+          output: {
+            type: 'error-text' as const,
+            value: parseErrorMessage,
+          },
+        };
+      },
+    });
   }
 
   return recordSpan({

From e2266ddd690975e385ee1f4d17dfe4f40110544d Mon Sep 17 00:00:00 2001
From: Peter Wielander <peter.wielander@vercel.com>
Date: Mon, 29 Jun 2026 13:16:51 -0700
Subject: [PATCH 3/3] chore(changeset): condense to a single sentence

Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
---
 .changeset/recover-invalid-tool-input.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.changeset/recover-invalid-tool-input.md b/.changeset/recover-invalid-tool-input.md
index 65be97da97..e48258a9b1 100644
--- a/.changeset/recover-invalid-tool-input.md
+++ b/.changeset/recover-invalid-tool-input.md
@@ -2,6 +2,4 @@
 '@workflow/ai': patch
 ---
 
-DurableAgent: recover from invalid tool-call input instead of aborting the stream
-
-When a model emits a tool call whose arguments fail `inputSchema` validation (and no `experimental_repairToolCall` fixes it), `executeTool` now returns the validation error to the model as an `error-text` tool result — the same way tool *execution* errors are already handled — instead of throwing and aborting the whole agent stream. In a durable workflow that throw fails the entire run, so a single occasionally-malformed model tool-call could kill a long-running task with no chance for the agent to self-correct. The agent now sees the error as a tool result and can fix the arguments and retry within its step budget.
+DurableAgent now recovers from invalid tool-call input by returning the validation error to the model instead of aborting the stream.