Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


def _make_eval_result():
"""Creates an EvaluationResult with representative data for loss analysis."""
"""Creates an EvaluationResult with rubric verdicts for loss analysis."""
return types.EvaluationResult(
eval_case_results=[
types.EvalCaseResult(
Expand All @@ -32,6 +32,19 @@ def _make_eval_result():
"multi_turn_task_success_v1": types.EvalCaseMetricResult(
score=0.0,
explanation="Failed tool invocation",
rubric_verdicts=[
types.evals.RubricVerdict(
evaluated_rubric=types.evals.Rubric(
rubric_id="tool_invocation",
content=types.evals.RubricContent(
property=types.evals.RubricContentProperty(
description="The agent should invoke the find_flights tool with the correct parameters.",
)
),
),
verdict=False,
)
],
)
},
)
Expand Down
80 changes: 80 additions & 0 deletions tests/unit/vertexai/genai/test_evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -441,6 +441,86 @@ def test_sanitize_agent_data_skips_error_payload(self):
assert "error" not in sanitized
assert sanitized == {}

def test_t_inline_results_strips_none_tool_fields(self):
"""Tests that t_inline_results strips None tool fields like file_search."""
eval_result = common_types.EvaluationResult(
eval_case_results=[
common_types.EvalCaseResult(
eval_case_index=0,
response_candidate_results=[
common_types.ResponseCandidateResult(
response_index=0,
metric_results={
"multi_turn_task_success_v1": common_types.EvalCaseMetricResult(
score=0.0,
explanation="Failed",
)
},
)
],
)
],
evaluation_dataset=[
common_types.EvaluationDataset(
eval_cases=[
common_types.EvalCase(
agent_data=vertexai_genai_types.evals.AgentData(
agents={
"agent_0": vertexai_genai_types.evals.AgentConfig(
agent_id="agent_0",
agent_type="LlmAgent",
instruction="You are a helper.",
tools=[
genai_types.Tool(
function_declarations=[
genai_types.FunctionDeclaration(
name="search",
description="Searches the web.",
)
]
)
],
)
},
turns=[
vertexai_genai_types.evals.ConversationTurn(
turn_index=0,
events=[
vertexai_genai_types.evals.AgentEvent(
author="user",
content=genai_types.Content(
parts=[genai_types.Part(text="Hi")],
),
),
],
)
],
)
)
]
)
],
metadata=common_types.EvaluationRunMetadata(
candidate_names=["candidate-1"]
),
)

payload = _transformers.t_inline_results([eval_result])
assert len(payload) == 1

agent_data = payload[0]["request"]["prompt"]["agent_data"]
agent_config = agent_data["agents"]["agent_0"]
assert "tools" in agent_config
tool = agent_config["tools"][0]
# function_declarations should be preserved
assert "function_declarations" in tool
assert tool["function_declarations"][0]["name"] == "search"
# Gemini-API-only fields must NOT be present (they would be None)
assert "file_search" not in tool
assert "mcp_servers" not in tool
assert "google_search" not in tool
assert "code_execution" not in tool

def test_t_inline_results_skips_error_agent_data_in_df(self):
"""Tests that t_inline_results skips error agent_data from DataFrame."""
error_json = json.dumps({"error": "Agent run failed"})
Expand Down
4 changes: 2 additions & 2 deletions vertexai/_genai/_transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,7 +422,7 @@ def t_inline_results(
if agent_data:
if hasattr(agent_data, "model_dump"):
prompt_payload["agent_data"] = _sanitize_agent_data(
agent_data.model_dump()
agent_data.model_dump(exclude_none=True)
)
elif isinstance(agent_data, dict):
prompt_payload["agent_data"] = _sanitize_agent_data(agent_data)
Expand All @@ -442,7 +442,7 @@ def t_inline_results(
if df_agent_data is not None:
if hasattr(df_agent_data, "model_dump"):
prompt_payload["agent_data"] = _sanitize_agent_data(
df_agent_data.model_dump()
df_agent_data.model_dump(exclude_none=True)
)
elif isinstance(df_agent_data, str):
try:
Expand Down
Loading