diff --git a/tests/unit/vertexai/genai/test_evals.py b/tests/unit/vertexai/genai/test_evals.py index cec51f0df3..92e463ccf5 100644 --- a/tests/unit/vertexai/genai/test_evals.py +++ b/tests/unit/vertexai/genai/test_evals.py @@ -122,15 +122,21 @@ def mock_api_client_fixture(): @pytest.fixture def mock_eval_dependencies(mock_api_client_fixture): - with mock.patch("google.cloud.storage.Client") as mock_storage_client, mock.patch( - "google.cloud.bigquery.Client" - ) as mock_bq_client, mock.patch( - "vertexai._genai.evals.Evals.evaluate_instances" - ) as mock_evaluate_instances, mock.patch( - "vertexai._genai._gcs_utils.GcsUtils.upload_json_to_prefix" - ) as mock_upload_to_gcs, mock.patch( - "vertexai._genai._evals_metric_loaders.LazyLoadedPrebuiltMetric._fetch_and_parse" - ) as mock_fetch_prebuilt_metric: + # fmt: off + with ( + mock.patch("google.cloud.storage.Client") as mock_storage_client, + mock.patch("google.cloud.bigquery.Client") as mock_bq_client, + mock.patch( + "vertexai._genai.evals.Evals.evaluate_instances" + ) as mock_evaluate_instances, + mock.patch( + "vertexai._genai._gcs_utils.GcsUtils.upload_json_to_prefix" + ) as mock_upload_to_gcs, + mock.patch( + "vertexai._genai._evals_metric_loaders.LazyLoadedPrebuiltMetric._fetch_and_parse" + ) as mock_fetch_prebuilt_metric, + ): + # fmt: on def mock_evaluate_instances_side_effect(*args, **kwargs): metric_config = kwargs.get("metric_config", {}) @@ -3386,14 +3392,8 @@ def test_run_inference_with_agent_engine_falls_back_to_managed_sessions_api( assert inference_result.candidate_name == "agent_engine_0" @mock.patch.object(_evals_utils, "EvalDatasetLoader") - @mock.patch("vertexai._genai._evals_common.InMemorySessionService") # fmt: skip - @mock.patch("vertexai._genai._evals_common.Runner") - @mock.patch("vertexai._genai._evals_common.LlmAgent") def test_run_inference_with_local_agent( self, - mock_llm_agent, - mock_runner, - mock_session_service, mock_eval_dataset_loader, ): mock_df = pd.DataFrame( @@ -3421,8 +3421,15 @@ def test_run_inference_with_local_agent( mock_agent_instance.instruction = "mock instruction" mock_agent_instance.tools = [] mock_agent_instance.sub_agents = [] - mock_llm_agent.return_value = mock_agent_instance + + # Mock ADK modules for lazy imports in _execute_local_agent_run_with_retry_async + mock_session_service = mock.MagicMock() mock_session_service.return_value.create_session = mock.AsyncMock() + mock_runner = mock.MagicMock() + mock_adk_sessions_module = mock.MagicMock() + mock_adk_sessions_module.InMemorySessionService = mock_session_service + mock_adk_runners_module = mock.MagicMock() + mock_adk_runners_module.Runner = mock_runner mock_runner_instance = mock_runner.return_value stream_run_return_value_1 = [ mock.Mock( @@ -3473,10 +3480,19 @@ def run_async_side_effect(*args, **kwargs): mock_runner_instance.run_async.side_effect = run_async_side_effect - inference_result = self.client.evals.run_inference( - agent=mock_agent_instance, - src=mock_df, - ) + with mock.patch.dict( + sys.modules, + { + "google.adk": mock.MagicMock(), + "google.adk.sessions": mock_adk_sessions_module, + "google.adk.runners": mock_adk_runners_module, + "google.adk.agents": mock.MagicMock(), + }, + ): + inference_result = self.client.evals.run_inference( + agent=mock_agent_instance, + src=mock_df, + ) mock_eval_dataset_loader.return_value.load.assert_called_once_with(mock_df) assert mock_session_service.call_count == 2 @@ -3602,11 +3618,13 @@ def test_run_inference_with_litellm_string_prompt_format( mock_api_client_fixture, ): """Tests inference with LiteLLM using a simple prompt string.""" + # fmt: off with mock.patch( "vertexai._genai._evals_common.litellm" ) as mock_litellm, mock.patch( "vertexai._genai._evals_common._call_litellm_completion" ) as mock_call_litellm_completion: + # fmt: on mock_litellm.utils.get_valid_models.return_value = ["gpt-4o"] prompt_df = pd.DataFrame([{"prompt": "What is LiteLLM?"}]) expected_messages = [{"role": "user", "content": "What is LiteLLM?"}] @@ -3658,11 +3676,16 @@ def test_run_inference_with_litellm_openai_request_format( mock_api_client_fixture, ): """Tests inference with LiteLLM where the row contains a chat completion request body.""" - with mock.patch( - "vertexai._genai._evals_common.litellm" - ) as mock_litellm, mock.patch( - "vertexai._genai._evals_common._call_litellm_completion" - ) as mock_call_litellm_completion: + # fmt: off + with ( + mock.patch( + "vertexai._genai._evals_common.litellm" + ) as mock_litellm, + mock.patch( + "vertexai._genai._evals_common._call_litellm_completion" + ) as mock_call_litellm_completion, + ): + # fmt: on mock_litellm.utils.get_valid_models.return_value = ["gpt-4o"] prompt_df = pd.DataFrame( [ @@ -4178,21 +4201,23 @@ def test_run_agent_internal_multi_turn_with_agent(self, mock_run_agent): ] assert "mock_agent" in agent_data["agents"] - @mock.patch("vertexai._genai._evals_common.ADK_SessionInput") # fmt: skip - @mock.patch("vertexai._genai._evals_common.EvaluationGenerator") # fmt: skip - @mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulator") # fmt: skip - @mock.patch("vertexai._genai._evals_common.ConversationScenario") # fmt: skip - @mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig") # fmt: skip @pytest.mark.asyncio - async def test_run_adk_user_simulation_with_intermediate_events( - self, - mock_config, - mock_scenario, - mock_simulator, - mock_generator, - mock_session_input, - ): + async def test_run_adk_user_simulation_with_intermediate_events(self): """Tests that intermediate invocation events (e.g. tool calls) are parsed successfully.""" + mock_scenario = mock.MagicMock() + mock_config = mock.MagicMock() + mock_simulator = mock.MagicMock() + mock_generator = mock.MagicMock() + mock_session_input = mock.MagicMock() + mock_adk_eval_scenarios = mock.MagicMock() + mock_adk_eval_scenarios.ConversationScenario = mock_scenario + mock_adk_eval_case = mock.MagicMock() + mock_adk_eval_case.SessionInput = mock_session_input + mock_adk_eval_generator = mock.MagicMock() + mock_adk_eval_generator.EvaluationGenerator = mock_generator + mock_adk_simulator_module = mock.MagicMock() + mock_adk_simulator_module.LlmBackedUserSimulator = mock_simulator + mock_adk_simulator_module.LlmBackedUserSimulatorConfig = mock_config row = pd.Series( { "starting_prompt": "I want a laptop.", @@ -4245,7 +4270,19 @@ async def test_run_adk_user_simulation_with_intermediate_events( mock_generator._generate_inferences_from_root_agent = mock.AsyncMock( return_value=[mock_invocation] ) - turns = await _evals_common._run_adk_user_simulation(row, mock_agent) + with mock.patch.dict( + sys.modules, + { + "google.adk": mock.MagicMock(), + "google.adk.evaluation": mock.MagicMock(), + "google.adk.evaluation.conversation_scenarios": mock_adk_eval_scenarios, + "google.adk.evaluation.eval_case": mock_adk_eval_case, + "google.adk.evaluation.evaluation_generator": mock_adk_eval_generator, + "google.adk.evaluation.simulation": mock.MagicMock(), + "google.adk.evaluation.simulation.llm_backed_user_simulator": mock_adk_simulator_module, + }, + ): + turns = await _evals_common._run_adk_user_simulation(row, mock_agent) assert len(turns) == 1 turn = turns[0] @@ -7086,20 +7123,50 @@ def test_build_request_payload_tool_use_quality_v1_with_agent_data_tool_call( class TestRunAdkUserSimulation: """Unit tests for the _run_adk_user_simulation function.""" - @mock.patch("vertexai._genai._evals_common.ADK_SessionInput") # fmt: skip - @mock.patch("vertexai._genai._evals_common.EvaluationGenerator") # fmt: skip - @mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulator") # fmt: skip - @mock.patch("vertexai._genai._evals_common.ConversationScenario") # fmt: skip - @mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig") # fmt: skip + def _build_adk_mock_modules(self): + """Builds mock ADK modules for lazy imports in _run_adk_user_simulation.""" + mock_scenario_cls = mock.MagicMock() + mock_config_cls = mock.MagicMock() + mock_simulator_cls = mock.MagicMock() + mock_generator_cls = mock.MagicMock() + mock_session_input_cls = mock.MagicMock() + mock_modules = { + "google.adk": mock.MagicMock(), + "google.adk.evaluation": mock.MagicMock(), + "google.adk.evaluation.conversation_scenarios": mock.MagicMock( + ConversationScenario=mock_scenario_cls + ), + "google.adk.evaluation.eval_case": mock.MagicMock( + SessionInput=mock_session_input_cls + ), + "google.adk.evaluation.evaluation_generator": mock.MagicMock( + EvaluationGenerator=mock_generator_cls + ), + "google.adk.evaluation.simulation": mock.MagicMock(), + "google.adk.evaluation.simulation.llm_backed_user_simulator": mock.MagicMock( + LlmBackedUserSimulator=mock_simulator_cls, + LlmBackedUserSimulatorConfig=mock_config_cls, + ), + } + return ( + mock_modules, + mock_scenario_cls, + mock_config_cls, + mock_simulator_cls, + mock_generator_cls, + mock_session_input_cls, + ) + @pytest.mark.asyncio - async def test_run_adk_user_simulation_success( - self, - mock_config_cls, - mock_scenario_cls, - mock_simulator_cls, - mock_generator_cls, - mock_session_input_cls, - ): + async def test_run_adk_user_simulation_success(self): + ( + mock_modules, + mock_scenario_cls, + _, + _, + mock_generator_cls, + mock_session_input_cls, + ) = self._build_adk_mock_modules() row = pd.Series( { "starting_prompt": "start", @@ -7119,7 +7186,8 @@ async def test_run_adk_user_simulation_success( return_value=[mock_invocation] ) - turns = await _evals_common._run_adk_user_simulation(row, mock_agent) + with mock.patch.dict(sys.modules, mock_modules): + turns = await _evals_common._run_adk_user_simulation(row, mock_agent) assert len(turns) == 1 turn = turns[0] @@ -7138,40 +7206,26 @@ async def test_run_adk_user_simulation_success( ) mock_session_input_cls.assert_called_once() - @mock.patch("vertexai._genai._evals_common.ADK_SessionInput") # fmt: skip - @mock.patch("vertexai._genai._evals_common.EvaluationGenerator") # fmt: skip - @mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulator") # fmt: skip - @mock.patch("vertexai._genai._evals_common.ConversationScenario") # fmt: skip - @mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig") # fmt: skip @pytest.mark.asyncio - async def test_run_adk_user_simulation_missing_columns( - self, - mock_config_cls, - mock_scenario_cls, - mock_simulator_cls, - mock_generator_cls, - mock_session_input_cls, - ): + async def test_run_adk_user_simulation_missing_columns(self): + mock_modules, _, _, _, _, _ = self._build_adk_mock_modules() row = pd.Series({"conversation_plan": "plan"}) mock_agent = mock.Mock() - with pytest.raises(ValueError, match="User simulation requires"): - await _evals_common._run_adk_user_simulation(row, mock_agent) + with mock.patch.dict(sys.modules, mock_modules): + with pytest.raises(ValueError, match="User simulation requires"): + await _evals_common._run_adk_user_simulation(row, mock_agent) - @mock.patch("vertexai._genai._evals_common.ADK_SessionInput") # fmt: skip - @mock.patch("vertexai._genai._evals_common.EvaluationGenerator") # fmt: skip - @mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulator") # fmt: skip - @mock.patch("vertexai._genai._evals_common.ConversationScenario") # fmt: skip - @mock.patch("vertexai._genai._evals_common.LlmBackedUserSimulatorConfig") # fmt: skip @pytest.mark.asyncio - async def test_run_adk_user_simulation_missing_session_inputs( - self, - mock_config_cls, - mock_scenario_cls, - mock_simulator_cls, - mock_generator_cls, - mock_session_input_cls, - ): + async def test_run_adk_user_simulation_missing_session_inputs(self): + ( + mock_modules, + mock_scenario_cls, + _, + _, + mock_generator_cls, + mock_session_input_cls, + ) = self._build_adk_mock_modules() row = pd.Series( { "starting_prompt": "start", @@ -7190,7 +7244,8 @@ async def test_run_adk_user_simulation_missing_session_inputs( return_value=[mock_invocation] ) - await _evals_common._run_adk_user_simulation(row, mock_agent) + with mock.patch.dict(sys.modules, mock_modules): + await _evals_common._run_adk_user_simulation(row, mock_agent) mock_scenario_cls.assert_called_once_with( starting_prompt="start", diff --git a/vertexai/_genai/_evals_common.py b/vertexai/_genai/_evals_common.py index 72c2fc6eda..4bbd3d35b9 100644 --- a/vertexai/_genai/_evals_common.py +++ b/vertexai/_genai/_evals_common.py @@ -54,32 +54,6 @@ except ImportError: litellm = None -try: - from google.adk.agents import LlmAgent - from google.adk.runners import Runner - from google.adk.sessions import InMemorySessionService - from google.adk.evaluation.simulation.llm_backed_user_simulator import ( - LlmBackedUserSimulator, - ) - from google.adk.evaluation.simulation.llm_backed_user_simulator import ( - LlmBackedUserSimulatorConfig, - ) - from google.adk.evaluation.conversation_scenarios import ConversationScenario - from google.adk.evaluation.evaluation_generator import EvaluationGenerator - from google.adk.evaluation.eval_case import SessionInput as ADK_SessionInput -except ImportError: - logging.getLogger(__name__).warning( - "ADK is not installed. Please install it using" " 'pip install google-adk'" - ) - LlmAgent = None - Runner = None - InMemorySessionService = None - LlmBackedUserSimulator = None - LlmBackedUserSimulatorConfig = None - ConversationScenario = None - EvaluationGenerator = None - ADK_SessionInput = None - _thread_local_data = threading.local() @@ -501,7 +475,7 @@ def _execute_inference_concurrently( gemini_config: Optional[genai_types.GenerateContentConfig] = None, inference_fn: Optional[Callable[..., Any]] = None, agent_engine: Optional[Union[str, types.AgentEngine]] = None, - agent: Optional[LlmAgent] = None, + agent: Optional["LlmAgent"] = None, # type: ignore # noqa: F821 user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None, ) -> list[ Union[ @@ -979,10 +953,22 @@ def _run_inference_internal( async def _run_adk_user_simulation( row: pd.Series, - agent: LlmAgent, + agent: "LlmAgent", # type: ignore # noqa: F821 config: Optional[types.evals.UserSimulatorConfig] = None, ) -> list[dict[str, Any]]: """Runs a multi-turn user simulation using ADK's EvaluationGenerator.""" + # Lazy-import ADK dependencies to avoid top-level import failures when + # google-adk is not installed. + from google.adk.evaluation.conversation_scenarios import ConversationScenario + from google.adk.evaluation.eval_case import SessionInput as ADK_SessionInput + from google.adk.evaluation.evaluation_generator import EvaluationGenerator + from google.adk.evaluation.simulation.llm_backed_user_simulator import ( + LlmBackedUserSimulator, + ) + from google.adk.evaluation.simulation.llm_backed_user_simulator import ( + LlmBackedUserSimulatorConfig, + ) + starting_prompt = row.get("starting_prompt") conversation_plan = row.get("conversation_plan") user_persona = "EVALUATOR" @@ -1169,7 +1155,7 @@ def _execute_inference( src: Union[str, pd.DataFrame], model: Optional[Union[Callable[[Any], Any], str]] = None, agent_engine: Optional[Union[str, types.AgentEngine]] = None, - agent: Optional[LlmAgent] = None, + agent: Optional["LlmAgent"] = None, # type: ignore # noqa: F821 dest: Optional[str] = None, config: Optional[genai_types.GenerateContentConfig] = None, prompt_template: Optional[Union[str, types.PromptTemplateOrDict]] = None, @@ -1859,7 +1845,7 @@ def _create_agent_results_dataframe( def _run_agent_internal( api_client: BaseApiClient, agent_engine: Optional[Union[str, types.AgentEngine]], - agent: Optional[LlmAgent], + agent: Optional["LlmAgent"], # type: ignore # noqa: F821 prompt_dataset: pd.DataFrame, user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None, allow_cross_region_model: bool = False, @@ -1910,7 +1896,7 @@ def _run_agent_internal( def _run_agent( api_client: BaseApiClient, agent_engine: Optional[Union[str, types.AgentEngine]], - agent: Optional[LlmAgent], + agent: Optional["LlmAgent"], # type: ignore # noqa: F821 prompt_dataset: pd.DataFrame, user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None, allow_cross_region_model: bool = False, @@ -1933,9 +1919,9 @@ def _run_agent( raise ValueError( f"The model '{model_name}' is currently only available in the" " 'global' region. Because this request originated in" - f" '{current_location}', you must explicitly set " - "allow_cross_region_model=True to allow your data to be routed outside" - " of your request's region." + f" '{current_location}', you must explicitly set" + " allow_cross_region_model=True to allow your data to be routed" + " outside of your request's region." ) logger.warning( @@ -2106,7 +2092,7 @@ def _execute_agent_run_with_retry( def _execute_local_agent_run_with_retry( row: pd.Series, contents: Union[genai_types.ContentListUnion, genai_types.ContentListUnionDict], - agent: LlmAgent, + agent: "LlmAgent", # type: ignore # noqa: F821 max_retries: int = 3, user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None, ) -> Union[list[dict[str, Any]], dict[str, Any]]: @@ -2121,11 +2107,15 @@ def _execute_local_agent_run_with_retry( async def _execute_local_agent_run_with_retry_async( row: pd.Series, contents: Union[genai_types.ContentListUnion, genai_types.ContentListUnionDict], - agent: LlmAgent, + agent: "LlmAgent", # type: ignore # noqa: F821 max_retries: int = 3, user_simulator_config: Optional[types.evals.UserSimulatorConfig] = None, ) -> Union[list[dict[str, Any]], dict[str, Any]]: """Executes agent run locally for a single prompt asynchronously.""" + # Lazy-import ADK dependencies to avoid top-level import failures when + # google-adk is not installed. + from google.adk.runners import Runner + from google.adk.sessions import InMemorySessionService # Multi-turn agent scraping with user simulation. if user_simulator_config or "conversation_plan" in row: @@ -2616,8 +2606,8 @@ def _get_content(row: dict[str, Any], column: str) -> Optional[genai_types.Conte return cast(genai_types.Content, row[column]) else: raise ValueError( - f"{column} must be a string or a Content object. " - f"Got {type(row[column])}." + f"{column} must be a string or a Content object. Got" + f" {type(row[column])}." )