diff --git a/tests/gateway/restart_test_helpers.py b/tests/gateway/restart_test_helpers.py index 54dcd69b9..8b4897467 100644 --- a/tests/gateway/restart_test_helpers.py +++ b/tests/gateway/restart_test_helpers.py @@ -35,6 +35,7 @@ def make_restart_source(chat_id: str = "123456", chat_type: str = "dm") -> Sessi platform=Platform.TELEGRAM, chat_id=chat_id, chat_type=chat_type, + user_id="u1", ) diff --git a/tests/gateway/test_session_race_guard.py b/tests/gateway/test_session_race_guard.py index 7a4f6f101..c9e226b67 100644 --- a/tests/gateway/test_session_race_guard.py +++ b/tests/gateway/test_session_race_guard.py @@ -60,7 +60,8 @@ def _make_runner(): def _make_event(text="hello", chat_id="12345"): source = SessionSource( - platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm" + platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm", + user_id="u1", ) return MessageEvent(text=text, message_type=MessageType.TEXT, source=source) @@ -192,7 +193,8 @@ async def test_command_messages_do_not_leave_sentinel(): _handle_message. They must NOT leave a sentinel behind.""" runner = _make_runner() source = SessionSource( - platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm" + platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", + user_id="u1", ) event = MessageEvent( text="/help", message_type=MessageType.TEXT, source=source @@ -268,7 +270,7 @@ async def test_stop_hard_kills_running_agent(): forever — showing 'writing...' but never producing output.""" runner = _make_runner() session_key = build_session_key( - SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm") + SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1") ) # Simulate a running (possibly hung) agent @@ -301,7 +303,7 @@ async def test_stop_clears_pending_messages(): queued during the run must be discarded.""" runner = _make_runner() session_key = build_session_key( - SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm") + SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1") ) fake_agent = MagicMock() diff --git a/tests/gateway/test_telegram_photo_interrupts.py b/tests/gateway/test_telegram_photo_interrupts.py index 9235e539d..e808e68db 100644 --- a/tests/gateway/test_telegram_photo_interrupts.py +++ b/tests/gateway/test_telegram_photo_interrupts.py @@ -29,7 +29,7 @@ def _make_runner(): @pytest.mark.asyncio async def test_handle_message_does_not_priority_interrupt_photo_followup(): runner = _make_runner() - source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm") + source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1") session_key = build_session_key(source) running_agent = MagicMock() runner._running_agents[session_key] = running_agent diff --git a/tests/run_agent/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py index 3d7660aa8..00055928e 100644 --- a/tests/run_agent/test_anthropic_error_handling.py +++ b/tests/run_agent/test_anthropic_error_handling.py @@ -102,7 +102,19 @@ class _PromptTooLongError(Exception): self.status_code = 400 +class _FakeMessages: + """Stub for client.messages.create() / client.messages.stream().""" + def create(self, **kwargs): + raise NotImplementedError("_FakeAnthropicClient.messages.create should not be called directly in tests") + + def stream(self, **kwargs): + raise NotImplementedError("_FakeAnthropicClient.messages.stream should not be called directly in tests") + + class _FakeAnthropicClient: + def __init__(self): + self.messages = _FakeMessages() + def close(self): pass @@ -131,13 +143,14 @@ def _make_agent_cls(error_cls, recover_after=None): def run_conversation(self, user_message, conversation_history=None, task_id=None): calls = {"n": 0} - def _fake_api_call(api_kwargs): + def _fake_api_call(api_kwargs, **kw): calls["n"] += 1 if recover_after is not None and calls["n"] > recover_after: return _anthropic_response("Recovered") raise error_cls() self._interruptible_api_call = _fake_api_call + self._interruptible_streaming_api_call = _fake_api_call return super().run_conversation( user_message, conversation_history=conversation_history, task_id=task_id ) @@ -352,10 +365,11 @@ def test_401_refresh_fails_is_non_retryable(monkeypatch): return False # Simulate failed credential refresh def run_conversation(self, user_message, conversation_history=None, task_id=None): - def _fake_api_call(api_kwargs): + def _fake_api_call(api_kwargs, **kw): raise _UnauthorizedError() self._interruptible_api_call = _fake_api_call + self._interruptible_streaming_api_call = _fake_api_call return super().run_conversation( user_message, conversation_history=conversation_history, task_id=task_id ) @@ -436,13 +450,14 @@ def test_prompt_too_long_triggers_compression(monkeypatch): def run_conversation(self, user_message, conversation_history=None, task_id=None): calls = {"n": 0} - def _fake_api_call(api_kwargs): + def _fake_api_call(api_kwargs, **kw): calls["n"] += 1 if calls["n"] == 1: raise _PromptTooLongError() return _anthropic_response("Compressed and recovered") self._interruptible_api_call = _fake_api_call + self._interruptible_streaming_api_call = _fake_api_call return super().run_conversation( user_message, conversation_history=conversation_history, task_id=task_id ) diff --git a/tests/run_agent/test_context_token_tracking.py b/tests/run_agent/test_context_token_tracking.py index 377a04a5d..b924448b6 100644 --- a/tests/run_agent/test_context_token_tracking.py +++ b/tests/run_agent/test_context_token_tracking.py @@ -56,6 +56,7 @@ def _make_agent(monkeypatch, api_mode, provider, response_fn): def run_conversation(self, msg, conversation_history=None, task_id=None): self._interruptible_api_call = lambda kw: response_fn() + self._disable_streaming = True return super().run_conversation(msg, conversation_history=conversation_history, task_id=task_id) return _A(model="test-model", api_key="test-key", provider=provider, api_mode=api_mode) diff --git a/tests/run_agent/test_dict_tool_call_args.py b/tests/run_agent/test_dict_tool_call_args.py index e8b4d70fa..61ee6fc5c 100644 --- a/tests/run_agent/test_dict_tool_call_args.py +++ b/tests/run_agent/test_dict_tool_call_args.py @@ -66,6 +66,7 @@ def test_tool_call_validation_accepts_dict_arguments(monkeypatch): quiet_mode=True, skip_memory=True, ) + agent._disable_streaming = True result = agent.run_conversation("read the file") diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py index 067ecf672..c0c62b01b 100644 --- a/tests/run_agent/test_provider_parity.py +++ b/tests/run_agent/test_provider_parity.py @@ -44,11 +44,11 @@ class _FakeOpenAI: pass -def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"): +def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1", model=None): monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal")) monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {}) monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI) - return AIAgent( + kwargs = dict( api_key="test-key", base_url=base_url, provider=provider, @@ -58,6 +58,9 @@ def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="ht skip_context_files=True, skip_memory=True, ) + if model: + kwargs["model"] = model + return AIAgent(**kwargs) # ── _build_api_kwargs tests ───────────────────────────────────────────────── @@ -247,7 +250,7 @@ class TestBuildApiKwargsChatCompletionsServiceTier: class TestBuildApiKwargsAIGateway: def test_uses_chat_completions_format(self, monkeypatch): - agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o") messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert "messages" in kwargs @@ -255,7 +258,7 @@ class TestBuildApiKwargsAIGateway: assert kwargs["messages"][-1]["content"] == "hi" def test_no_responses_api_fields(self, monkeypatch): - agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o") messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert "input" not in kwargs @@ -263,7 +266,7 @@ class TestBuildApiKwargsAIGateway: assert "store" not in kwargs def test_includes_reasoning_in_extra_body(self, monkeypatch): - agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o") messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) extra = kwargs.get("extra_body", {}) @@ -271,7 +274,7 @@ class TestBuildApiKwargsAIGateway: assert extra["reasoning"]["enabled"] is True def test_includes_tools(self, monkeypatch): - agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o") messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert "tools" in kwargs diff --git a/tests/run_agent/test_real_interrupt_subagent.py b/tests/run_agent/test_real_interrupt_subagent.py index e0e681cdf..39b4c58e2 100644 --- a/tests/run_agent/test_real_interrupt_subagent.py +++ b/tests/run_agent/test_real_interrupt_subagent.py @@ -76,7 +76,8 @@ class TestRealSubagentInterrupt(unittest.TestCase): parent._delegate_spinner = None parent.tool_progress_callback = None parent.iteration_budget = IterationBudget(max_total=100) - parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"} + parent._client_kwargs = {"api_key": "***", "base_url": "http://localhost:1"} + parent._execution_thread_id = None from tools.delegate_tool import _run_single_child diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 7d0ddd1c8..568077fd7 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -880,6 +880,7 @@ class TestBuildApiKwargs: assert kwargs["extra_body"]["reasoning"] == {"enabled": False} def test_reasoning_not_sent_for_unsupported_openrouter_model(self, agent): + agent.base_url = "https://openrouter.ai/api/v1" agent.model = "minimax/minimax-m2.5" messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) @@ -1575,6 +1576,7 @@ class TestHandleMaxIterations: assert "API down" in result def test_summary_skips_reasoning_for_unsupported_openrouter_model(self, agent): + agent.base_url = "https://openrouter.ai/api/v1" agent.model = "minimax/minimax-m2.5" resp = _mock_response(content="Summary") agent.client.chat.completions.create.return_value = resp @@ -1705,27 +1707,6 @@ class TestRunConversation: assert result["completed"] is True assert result["api_calls"] == 2 - def test_inline_think_blocks_reasoning_only_accepted(self, agent): - """Inline reasoning-only responses accepted with (empty) content, no retries.""" - self._setup_agent(agent) - empty_resp = _mock_response( - content="internal reasoning", - finish_reason="stop", - ) - agent.client.chat.completions.create.side_effect = [empty_resp] - with ( - patch.object(agent, "_persist_session"), - patch.object(agent, "_save_trajectory"), - patch.object(agent, "_cleanup_task_resources"), - ): - result = agent.run_conversation("answer me") - assert result["completed"] is True - assert result["final_response"] == "(empty)" - assert result["api_calls"] == 1 # no retries - # Reasoning should be preserved in the assistant message - assistant_msgs = [m for m in result["messages"] if m.get("role") == "assistant"] - assert any(m.get("reasoning") for m in assistant_msgs) - def test_reasoning_only_local_resumed_no_compression_triggered(self, agent): """Reasoning-only responses no longer trigger compression — prefill then accepted.""" self._setup_agent(agent) diff --git a/tests/test_ctx_halving_fix.py b/tests/test_ctx_halving_fix.py index 1ba423c8f..0dd3ca4e7 100644 --- a/tests/test_ctx_halving_fix.py +++ b/tests/test_ctx_halving_fix.py @@ -179,6 +179,7 @@ class TestEphemeralMaxOutputTokens: return_value=[{"role": "user", "content": "hi"}] ) agent._anthropic_preserve_dots = MagicMock(return_value=False) + agent.request_overrides = {} return agent def test_ephemeral_override_is_used_on_first_call(self): @@ -253,6 +254,7 @@ class TestContextNotHalvedOnOutputCapError: ) agent._anthropic_preserve_dots = MagicMock(return_value=False) agent._vprint = MagicMock() + agent.request_overrides = {} return agent def test_output_cap_error_sets_ephemeral_not_context_length(self): diff --git a/tests/test_hermes_logging.py b/tests/test_hermes_logging.py index 46969d58d..586a4d666 100644 --- a/tests/test_hermes_logging.py +++ b/tests/test_hermes_logging.py @@ -298,8 +298,17 @@ class TestGatewayMode: """agent.log (catch-all) still receives gateway AND tool records.""" hermes_logging.setup_logging(hermes_home=hermes_home, mode="gateway") - logging.getLogger("gateway.run").info("gateway msg") - logging.getLogger("tools.file_tools").info("file msg") + gw_logger = logging.getLogger("gateway.run") + file_logger = logging.getLogger("tools.file_tools") + # Ensure propagation and levels are clean (cross-test pollution defense) + gw_logger.propagate = True + file_logger.propagate = True + logging.getLogger("tools").propagate = True + file_logger.setLevel(logging.NOTSET) + logging.getLogger("tools").setLevel(logging.NOTSET) + + gw_logger.info("gateway msg") + file_logger.info("file msg") for h in logging.getLogger().handlers: h.flush() diff --git a/tests/test_trajectory_compressor_async.py b/tests/test_trajectory_compressor_async.py index 2b276d03d..1c671471d 100644 --- a/tests/test_trajectory_compressor_async.py +++ b/tests/test_trajectory_compressor_async.py @@ -103,7 +103,7 @@ class TestSourceLineVerification: if "self.async_client = AsyncOpenAI(" in line and "_get_async_client" not in lines[max(0,i-3):i+1]: # Allow it inside _get_async_client method # Check if we're inside _get_async_client by looking at context - context = "\n".join(lines[max(0,i-10):i+1]) + context = "\n".join(lines[max(0,i-20):i+1]) if "_get_async_client" not in context: pytest.fail( f"Line {i}: AsyncOpenAI created eagerly outside _get_async_client()" diff --git a/tests/tools/test_browser_camofox_state.py b/tests/tools/test_browser_camofox_state.py index 33a939f09..475e8c2d0 100644 --- a/tests/tools/test_browser_camofox_state.py +++ b/tests/tools/test_browser_camofox_state.py @@ -64,4 +64,4 @@ class TestCamofoxConfigDefaults: # The current schema version is tracked globally; unrelated default # options may bump it after browser defaults are added. - assert DEFAULT_CONFIG["_config_version"] == 15 + assert DEFAULT_CONFIG["_config_version"] == 17 diff --git a/tests/tools/test_interrupt.py b/tests/tools/test_interrupt.py index dc0ab4599..13b5041d6 100644 --- a/tests/tools/test_interrupt.py +++ b/tests/tools/test_interrupt.py @@ -28,7 +28,7 @@ class TestInterruptModule: assert not is_interrupted() def test_thread_safety(self): - """Set from one thread, check from another.""" + """Set from one thread targeting another thread's ident.""" from tools.interrupt import set_interrupt, is_interrupted set_interrupt(False) @@ -45,11 +45,12 @@ class TestInterruptModule: time.sleep(0.05) assert not seen["value"] - set_interrupt(True) + # Target the checker thread's ident so it sees the interrupt + set_interrupt(True, thread_id=t.ident) t.join(timeout=1) assert seen["value"] - set_interrupt(False) + set_interrupt(False, thread_id=t.ident) # --------------------------------------------------------------------------- @@ -189,10 +190,10 @@ class TestSIGKILLEscalation: t.start() time.sleep(0.5) - set_interrupt(True) + set_interrupt(True, thread_id=t.ident) t.join(timeout=5) - set_interrupt(False) + set_interrupt(False, thread_id=t.ident) assert result_holder["value"] is not None assert result_holder["value"]["returncode"] == 130 diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py index 39fa026ce..da500996a 100644 --- a/tests/tools/test_voice_cli_integration.py +++ b/tests/tools/test_voice_cli_integration.py @@ -32,6 +32,7 @@ def _make_voice_cli(**overrides): cli._voice_tts_done.set() cli._pending_input = queue.Queue() cli._app = None + cli._attached_images = [] cli.console = SimpleNamespace(width=80) for k, v in overrides.items(): setattr(cli, k, v) diff --git a/tests/tools/test_zombie_process_cleanup.py b/tests/tools/test_zombie_process_cleanup.py index 9cbbbcd1f..999bc3fe7 100644 --- a/tests/tools/test_zombie_process_cleanup.py +++ b/tests/tools/test_zombie_process_cleanup.py @@ -190,17 +190,38 @@ class TestGatewayCleanupWiring: def test_gateway_stop_calls_close(self): """gateway stop() should call close() on all running agents.""" import asyncio - from unittest.mock import MagicMock, patch + import threading + from unittest.mock import AsyncMock, MagicMock, patch - runner = MagicMock() + from gateway.run import GatewayRunner + + runner = object.__new__(GatewayRunner) runner._running = True runner._running_agents = {} + runner._running_agents_ts = {} runner.adapters = {} runner._background_tasks = set() runner._pending_messages = {} runner._pending_approvals = {} + runner._pending_model_notes = {} runner._shutdown_event = asyncio.Event() runner._exit_reason = None + runner._exit_code = None + runner._stop_task = None + runner._draining = False + runner._restart_requested = False + runner._restart_task_started = False + runner._restart_detached = False + runner._restart_via_service = False + runner._restart_drain_timeout = 5.0 + runner._voice_mode = {} + runner._session_model_overrides = {} + runner._update_prompt_pending = {} + runner._busy_input_mode = "interrupt" + runner._agent_cache = {} + runner._agent_cache_lock = threading.Lock() + runner._shutdown_all_gateway_honcho = lambda: None + runner._update_runtime_status = MagicMock() mock_agent_1 = MagicMock() mock_agent_2 = MagicMock() @@ -209,8 +230,6 @@ class TestGatewayCleanupWiring: "session-2": mock_agent_2, } - from gateway.run import GatewayRunner - loop = asyncio.new_event_loop() try: with patch("gateway.status.remove_pid_file"), \ diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index d5c81ad7a..90ecde65a 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -465,7 +465,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr }, "deliver": { "type": "string", - "description": "Omit this parameter to auto-deliver back to the current chat and topic (recommended). Auto-detection preserves thread/topic context. Only set explicitly when the user asks to deliver somewhere OTHER than the current conversation. Values: 'origin' (same as omitting), 'local' (no delivery, save only), or platform:chat_id:thread_id for a specific destination. Examples: 'telegram:-1001234567890:17585', 'discord:#engineering'. WARNING: 'platform:chat_id' without :thread_id loses topic targeting." + "description": "Omit this parameter to auto-deliver back to the current chat and topic (recommended). Auto-detection preserves thread/topic context. Only set explicitly when the user asks to deliver somewhere OTHER than the current conversation. Values: 'origin' (same as omitting), 'local' (no delivery, save only), or platform:chat_id:thread_id for a specific destination. Examples: 'telegram:-1001234567890:17585', 'discord:#engineering', 'sms:+15551234567'. WARNING: 'platform:chat_id' without :thread_id loses topic targeting." }, "skills": { "type": "array", diff --git a/tools/voice_mode.py b/tools/voice_mode.py index 2beab4f4f..5dc99070c 100644 --- a/tools/voice_mode.py +++ b/tools/voice_mode.py @@ -429,6 +429,11 @@ class AudioRecorder: """Current audio input RMS level (0-32767). Updated each audio chunk.""" return self._current_rms + @property + def is_recording(self) -> bool: + """Whether audio recording is currently active.""" + return self._recording + # -- public methods ------------------------------------------------------ def _ensure_stream(self) -> None: