diff --git a/tests/gateway/restart_test_helpers.py b/tests/gateway/restart_test_helpers.py
index 54dcd69b9..8b4897467 100644
--- a/tests/gateway/restart_test_helpers.py
+++ b/tests/gateway/restart_test_helpers.py
@@ -35,6 +35,7 @@ def make_restart_source(chat_id: str = "123456", chat_type: str = "dm") -> Sessi
         platform=Platform.TELEGRAM,
         chat_id=chat_id,
         chat_type=chat_type,
+        user_id="u1",
     )
 
 
diff --git a/tests/gateway/test_session_race_guard.py b/tests/gateway/test_session_race_guard.py
index 7a4f6f101..c9e226b67 100644
--- a/tests/gateway/test_session_race_guard.py
+++ b/tests/gateway/test_session_race_guard.py
@@ -60,7 +60,8 @@ def _make_runner():
 
 def _make_event(text="hello", chat_id="12345"):
     source = SessionSource(
-        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm"
+        platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm",
+        user_id="u1",
     )
     return MessageEvent(text=text, message_type=MessageType.TEXT, source=source)
 
@@ -192,7 +193,8 @@ async def test_command_messages_do_not_leave_sentinel():
     _handle_message.  They must NOT leave a sentinel behind."""
     runner = _make_runner()
     source = SessionSource(
-        platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm"
+        platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm",
+        user_id="u1",
     )
     event = MessageEvent(
         text="/help", message_type=MessageType.TEXT, source=source
@@ -268,7 +270,7 @@ async def test_stop_hard_kills_running_agent():
     forever — showing 'writing...' but never producing output."""
     runner = _make_runner()
     session_key = build_session_key(
-        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
+        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
     )
 
     # Simulate a running (possibly hung) agent
@@ -301,7 +303,7 @@ async def test_stop_clears_pending_messages():
     queued during the run must be discarded."""
     runner = _make_runner()
     session_key = build_session_key(
-        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
+        SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
     )
 
     fake_agent = MagicMock()
diff --git a/tests/gateway/test_telegram_photo_interrupts.py b/tests/gateway/test_telegram_photo_interrupts.py
index 9235e539d..e808e68db 100644
--- a/tests/gateway/test_telegram_photo_interrupts.py
+++ b/tests/gateway/test_telegram_photo_interrupts.py
@@ -29,7 +29,7 @@ def _make_runner():
 @pytest.mark.asyncio
 async def test_handle_message_does_not_priority_interrupt_photo_followup():
     runner = _make_runner()
-    source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm")
+    source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1")
     session_key = build_session_key(source)
     running_agent = MagicMock()
     runner._running_agents[session_key] = running_agent
diff --git a/tests/run_agent/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py
index 3d7660aa8..00055928e 100644
--- a/tests/run_agent/test_anthropic_error_handling.py
+++ b/tests/run_agent/test_anthropic_error_handling.py
@@ -102,7 +102,19 @@ class _PromptTooLongError(Exception):
         self.status_code = 400
 
 
+class _FakeMessages:
+    """Stub for client.messages.create() / client.messages.stream()."""
+    def create(self, **kwargs):
+        raise NotImplementedError("_FakeAnthropicClient.messages.create should not be called directly in tests")
+
+    def stream(self, **kwargs):
+        raise NotImplementedError("_FakeAnthropicClient.messages.stream should not be called directly in tests")
+
+
 class _FakeAnthropicClient:
+    def __init__(self):
+        self.messages = _FakeMessages()
+
     def close(self):
         pass
 
@@ -131,13 +143,14 @@ def _make_agent_cls(error_cls, recover_after=None):
         def run_conversation(self, user_message, conversation_history=None, task_id=None):
             calls = {"n": 0}
 
-            def _fake_api_call(api_kwargs):
+            def _fake_api_call(api_kwargs, **kw):
                 calls["n"] += 1
                 if recover_after is not None and calls["n"] > recover_after:
                     return _anthropic_response("Recovered")
                 raise error_cls()
 
             self._interruptible_api_call = _fake_api_call
+            self._interruptible_streaming_api_call = _fake_api_call
             return super().run_conversation(
                 user_message, conversation_history=conversation_history, task_id=task_id
             )
@@ -352,10 +365,11 @@ def test_401_refresh_fails_is_non_retryable(monkeypatch):
             return False  # Simulate failed credential refresh
 
         def run_conversation(self, user_message, conversation_history=None, task_id=None):
-            def _fake_api_call(api_kwargs):
+            def _fake_api_call(api_kwargs, **kw):
                 raise _UnauthorizedError()
 
             self._interruptible_api_call = _fake_api_call
+            self._interruptible_streaming_api_call = _fake_api_call
             return super().run_conversation(
                 user_message, conversation_history=conversation_history, task_id=task_id
             )
@@ -436,13 +450,14 @@ def test_prompt_too_long_triggers_compression(monkeypatch):
         def run_conversation(self, user_message, conversation_history=None, task_id=None):
             calls = {"n": 0}
 
-            def _fake_api_call(api_kwargs):
+            def _fake_api_call(api_kwargs, **kw):
                 calls["n"] += 1
                 if calls["n"] == 1:
                     raise _PromptTooLongError()
                 return _anthropic_response("Compressed and recovered")
 
             self._interruptible_api_call = _fake_api_call
+            self._interruptible_streaming_api_call = _fake_api_call
             return super().run_conversation(
                 user_message, conversation_history=conversation_history, task_id=task_id
             )
diff --git a/tests/run_agent/test_context_token_tracking.py b/tests/run_agent/test_context_token_tracking.py
index 377a04a5d..b924448b6 100644
--- a/tests/run_agent/test_context_token_tracking.py
+++ b/tests/run_agent/test_context_token_tracking.py
@@ -56,6 +56,7 @@ def _make_agent(monkeypatch, api_mode, provider, response_fn):
 
         def run_conversation(self, msg, conversation_history=None, task_id=None):
             self._interruptible_api_call = lambda kw: response_fn()
+            self._disable_streaming = True
             return super().run_conversation(msg, conversation_history=conversation_history, task_id=task_id)
 
     return _A(model="test-model", api_key="test-key", provider=provider, api_mode=api_mode)
diff --git a/tests/run_agent/test_dict_tool_call_args.py b/tests/run_agent/test_dict_tool_call_args.py
index e8b4d70fa..61ee6fc5c 100644
--- a/tests/run_agent/test_dict_tool_call_args.py
+++ b/tests/run_agent/test_dict_tool_call_args.py
@@ -66,6 +66,7 @@ def test_tool_call_validation_accepts_dict_arguments(monkeypatch):
         quiet_mode=True,
         skip_memory=True,
     )
+    agent._disable_streaming = True
 
     result = agent.run_conversation("read the file")
 
diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py
index 067ecf672..c0c62b01b 100644
--- a/tests/run_agent/test_provider_parity.py
+++ b/tests/run_agent/test_provider_parity.py
@@ -44,11 +44,11 @@ class _FakeOpenAI:
         pass
 
 
-def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"):
+def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1", model=None):
     monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal"))
     monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {})
     monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI)
-    return AIAgent(
+    kwargs = dict(
         api_key="test-key",
         base_url=base_url,
         provider=provider,
@@ -58,6 +58,9 @@ def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="ht
         skip_context_files=True,
         skip_memory=True,
     )
+    if model:
+        kwargs["model"] = model
+    return AIAgent(**kwargs)
 
 
 # ── _build_api_kwargs tests ─────────────────────────────────────────────────
@@ -247,7 +250,7 @@ class TestBuildApiKwargsChatCompletionsServiceTier:
 
 class TestBuildApiKwargsAIGateway:
     def test_uses_chat_completions_format(self, monkeypatch):
-        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
+        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
         messages = [{"role": "user", "content": "hi"}]
         kwargs = agent._build_api_kwargs(messages)
         assert "messages" in kwargs
@@ -255,7 +258,7 @@ class TestBuildApiKwargsAIGateway:
         assert kwargs["messages"][-1]["content"] == "hi"
 
     def test_no_responses_api_fields(self, monkeypatch):
-        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
+        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
         messages = [{"role": "user", "content": "hi"}]
         kwargs = agent._build_api_kwargs(messages)
         assert "input" not in kwargs
@@ -263,7 +266,7 @@ class TestBuildApiKwargsAIGateway:
         assert "store" not in kwargs
 
     def test_includes_reasoning_in_extra_body(self, monkeypatch):
-        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
+        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
         messages = [{"role": "user", "content": "hi"}]
         kwargs = agent._build_api_kwargs(messages)
         extra = kwargs.get("extra_body", {})
@@ -271,7 +274,7 @@ class TestBuildApiKwargsAIGateway:
         assert extra["reasoning"]["enabled"] is True
 
     def test_includes_tools(self, monkeypatch):
-        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1")
+        agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o")
         messages = [{"role": "user", "content": "hi"}]
         kwargs = agent._build_api_kwargs(messages)
         assert "tools" in kwargs
diff --git a/tests/run_agent/test_real_interrupt_subagent.py b/tests/run_agent/test_real_interrupt_subagent.py
index e0e681cdf..39b4c58e2 100644
--- a/tests/run_agent/test_real_interrupt_subagent.py
+++ b/tests/run_agent/test_real_interrupt_subagent.py
@@ -76,7 +76,8 @@ class TestRealSubagentInterrupt(unittest.TestCase):
         parent._delegate_spinner = None
         parent.tool_progress_callback = None
         parent.iteration_budget = IterationBudget(max_total=100)
-        parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"}
+        parent._client_kwargs = {"api_key": "***", "base_url": "http://localhost:1"}
+        parent._execution_thread_id = None
 
         from tools.delegate_tool import _run_single_child
 
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index 7d0ddd1c8..568077fd7 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -880,6 +880,7 @@ class TestBuildApiKwargs:
         assert kwargs["extra_body"]["reasoning"] == {"enabled": False}
 
     def test_reasoning_not_sent_for_unsupported_openrouter_model(self, agent):
+        agent.base_url = "https://openrouter.ai/api/v1"
         agent.model = "minimax/minimax-m2.5"
         messages = [{"role": "user", "content": "hi"}]
         kwargs = agent._build_api_kwargs(messages)
@@ -1575,6 +1576,7 @@ class TestHandleMaxIterations:
         assert "API down" in result
 
     def test_summary_skips_reasoning_for_unsupported_openrouter_model(self, agent):
+        agent.base_url = "https://openrouter.ai/api/v1"
         agent.model = "minimax/minimax-m2.5"
         resp = _mock_response(content="Summary")
         agent.client.chat.completions.create.return_value = resp
@@ -1705,27 +1707,6 @@ class TestRunConversation:
         assert result["completed"] is True
         assert result["api_calls"] == 2
 
-    def test_inline_think_blocks_reasoning_only_accepted(self, agent):
-        """Inline <think> reasoning-only responses accepted with (empty) content, no retries."""
-        self._setup_agent(agent)
-        empty_resp = _mock_response(
-            content="<think>internal reasoning</think>",
-            finish_reason="stop",
-        )
-        agent.client.chat.completions.create.side_effect = [empty_resp]
-        with (
-            patch.object(agent, "_persist_session"),
-            patch.object(agent, "_save_trajectory"),
-            patch.object(agent, "_cleanup_task_resources"),
-        ):
-            result = agent.run_conversation("answer me")
-        assert result["completed"] is True
-        assert result["final_response"] == "(empty)"
-        assert result["api_calls"] == 1  # no retries
-        # Reasoning should be preserved in the assistant message
-        assistant_msgs = [m for m in result["messages"] if m.get("role") == "assistant"]
-        assert any(m.get("reasoning") for m in assistant_msgs)
-
     def test_reasoning_only_local_resumed_no_compression_triggered(self, agent):
         """Reasoning-only responses no longer trigger compression — prefill then accepted."""
         self._setup_agent(agent)
diff --git a/tests/test_ctx_halving_fix.py b/tests/test_ctx_halving_fix.py
index 1ba423c8f..0dd3ca4e7 100644
--- a/tests/test_ctx_halving_fix.py
+++ b/tests/test_ctx_halving_fix.py
@@ -179,6 +179,7 @@ class TestEphemeralMaxOutputTokens:
             return_value=[{"role": "user", "content": "hi"}]
         )
         agent._anthropic_preserve_dots = MagicMock(return_value=False)
+        agent.request_overrides = {}
         return agent
 
     def test_ephemeral_override_is_used_on_first_call(self):
@@ -253,6 +254,7 @@ class TestContextNotHalvedOnOutputCapError:
         )
         agent._anthropic_preserve_dots = MagicMock(return_value=False)
         agent._vprint = MagicMock()
+        agent.request_overrides = {}
         return agent
 
     def test_output_cap_error_sets_ephemeral_not_context_length(self):
diff --git a/tests/test_hermes_logging.py b/tests/test_hermes_logging.py
index 46969d58d..586a4d666 100644
--- a/tests/test_hermes_logging.py
+++ b/tests/test_hermes_logging.py
@@ -298,8 +298,17 @@ class TestGatewayMode:
         """agent.log (catch-all) still receives gateway AND tool records."""
         hermes_logging.setup_logging(hermes_home=hermes_home, mode="gateway")
 
-        logging.getLogger("gateway.run").info("gateway msg")
-        logging.getLogger("tools.file_tools").info("file msg")
+        gw_logger = logging.getLogger("gateway.run")
+        file_logger = logging.getLogger("tools.file_tools")
+        # Ensure propagation and levels are clean (cross-test pollution defense)
+        gw_logger.propagate = True
+        file_logger.propagate = True
+        logging.getLogger("tools").propagate = True
+        file_logger.setLevel(logging.NOTSET)
+        logging.getLogger("tools").setLevel(logging.NOTSET)
+
+        gw_logger.info("gateway msg")
+        file_logger.info("file msg")
 
         for h in logging.getLogger().handlers:
             h.flush()
diff --git a/tests/test_trajectory_compressor_async.py b/tests/test_trajectory_compressor_async.py
index 2b276d03d..1c671471d 100644
--- a/tests/test_trajectory_compressor_async.py
+++ b/tests/test_trajectory_compressor_async.py
@@ -103,7 +103,7 @@ class TestSourceLineVerification:
             if "self.async_client = AsyncOpenAI(" in line and "_get_async_client" not in lines[max(0,i-3):i+1]:
                 # Allow it inside _get_async_client method
                 # Check if we're inside _get_async_client by looking at context
-                context = "\n".join(lines[max(0,i-10):i+1])
+                context = "\n".join(lines[max(0,i-20):i+1])
                 if "_get_async_client" not in context:
                     pytest.fail(
                         f"Line {i}: AsyncOpenAI created eagerly outside _get_async_client()"
diff --git a/tests/tools/test_browser_camofox_state.py b/tests/tools/test_browser_camofox_state.py
index 33a939f09..475e8c2d0 100644
--- a/tests/tools/test_browser_camofox_state.py
+++ b/tests/tools/test_browser_camofox_state.py
@@ -64,4 +64,4 @@ class TestCamofoxConfigDefaults:
 
         # The current schema version is tracked globally; unrelated default
         # options may bump it after browser defaults are added.
-        assert DEFAULT_CONFIG["_config_version"] == 15
+        assert DEFAULT_CONFIG["_config_version"] == 17
diff --git a/tests/tools/test_interrupt.py b/tests/tools/test_interrupt.py
index dc0ab4599..13b5041d6 100644
--- a/tests/tools/test_interrupt.py
+++ b/tests/tools/test_interrupt.py
@@ -28,7 +28,7 @@ class TestInterruptModule:
         assert not is_interrupted()
 
     def test_thread_safety(self):
-        """Set from one thread, check from another."""
+        """Set from one thread targeting another thread's ident."""
         from tools.interrupt import set_interrupt, is_interrupted
         set_interrupt(False)
 
@@ -45,11 +45,12 @@ class TestInterruptModule:
         time.sleep(0.05)
         assert not seen["value"]
 
-        set_interrupt(True)
+        # Target the checker thread's ident so it sees the interrupt
+        set_interrupt(True, thread_id=t.ident)
         t.join(timeout=1)
         assert seen["value"]
 
-        set_interrupt(False)
+        set_interrupt(False, thread_id=t.ident)
 
 
 # ---------------------------------------------------------------------------
@@ -189,10 +190,10 @@ class TestSIGKILLEscalation:
         t.start()
 
         time.sleep(0.5)
-        set_interrupt(True)
+        set_interrupt(True, thread_id=t.ident)
 
         t.join(timeout=5)
-        set_interrupt(False)
+        set_interrupt(False, thread_id=t.ident)
 
         assert result_holder["value"] is not None
         assert result_holder["value"]["returncode"] == 130
diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py
index 39fa026ce..da500996a 100644
--- a/tests/tools/test_voice_cli_integration.py
+++ b/tests/tools/test_voice_cli_integration.py
@@ -32,6 +32,7 @@ def _make_voice_cli(**overrides):
     cli._voice_tts_done.set()
     cli._pending_input = queue.Queue()
     cli._app = None
+    cli._attached_images = []
     cli.console = SimpleNamespace(width=80)
     for k, v in overrides.items():
         setattr(cli, k, v)
diff --git a/tests/tools/test_zombie_process_cleanup.py b/tests/tools/test_zombie_process_cleanup.py
index 9cbbbcd1f..999bc3fe7 100644
--- a/tests/tools/test_zombie_process_cleanup.py
+++ b/tests/tools/test_zombie_process_cleanup.py
@@ -190,17 +190,38 @@ class TestGatewayCleanupWiring:
     def test_gateway_stop_calls_close(self):
         """gateway stop() should call close() on all running agents."""
         import asyncio
-        from unittest.mock import MagicMock, patch
+        import threading
+        from unittest.mock import AsyncMock, MagicMock, patch
 
-        runner = MagicMock()
+        from gateway.run import GatewayRunner
+
+        runner = object.__new__(GatewayRunner)
         runner._running = True
         runner._running_agents = {}
+        runner._running_agents_ts = {}
         runner.adapters = {}
         runner._background_tasks = set()
         runner._pending_messages = {}
         runner._pending_approvals = {}
+        runner._pending_model_notes = {}
         runner._shutdown_event = asyncio.Event()
         runner._exit_reason = None
+        runner._exit_code = None
+        runner._stop_task = None
+        runner._draining = False
+        runner._restart_requested = False
+        runner._restart_task_started = False
+        runner._restart_detached = False
+        runner._restart_via_service = False
+        runner._restart_drain_timeout = 5.0
+        runner._voice_mode = {}
+        runner._session_model_overrides = {}
+        runner._update_prompt_pending = {}
+        runner._busy_input_mode = "interrupt"
+        runner._agent_cache = {}
+        runner._agent_cache_lock = threading.Lock()
+        runner._shutdown_all_gateway_honcho = lambda: None
+        runner._update_runtime_status = MagicMock()
 
         mock_agent_1 = MagicMock()
         mock_agent_2 = MagicMock()
@@ -209,8 +230,6 @@ class TestGatewayCleanupWiring:
             "session-2": mock_agent_2,
         }
 
-        from gateway.run import GatewayRunner
-
         loop = asyncio.new_event_loop()
         try:
             with patch("gateway.status.remove_pid_file"), \
diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py
index d5c81ad7a..90ecde65a 100644
--- a/tools/cronjob_tools.py
+++ b/tools/cronjob_tools.py
@@ -465,7 +465,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr
             },
             "deliver": {
                 "type": "string",
-                "description": "Omit this parameter to auto-deliver back to the current chat and topic (recommended). Auto-detection preserves thread/topic context. Only set explicitly when the user asks to deliver somewhere OTHER than the current conversation. Values: 'origin' (same as omitting), 'local' (no delivery, save only), or platform:chat_id:thread_id for a specific destination. Examples: 'telegram:-1001234567890:17585', 'discord:#engineering'. WARNING: 'platform:chat_id' without :thread_id loses topic targeting."
+                "description": "Omit this parameter to auto-deliver back to the current chat and topic (recommended). Auto-detection preserves thread/topic context. Only set explicitly when the user asks to deliver somewhere OTHER than the current conversation. Values: 'origin' (same as omitting), 'local' (no delivery, save only), or platform:chat_id:thread_id for a specific destination. Examples: 'telegram:-1001234567890:17585', 'discord:#engineering', 'sms:+15551234567'. WARNING: 'platform:chat_id' without :thread_id loses topic targeting."
             },
             "skills": {
                 "type": "array",
diff --git a/tools/voice_mode.py b/tools/voice_mode.py
index 2beab4f4f..5dc99070c 100644
--- a/tools/voice_mode.py
+++ b/tools/voice_mode.py
@@ -429,6 +429,11 @@ class AudioRecorder:
         """Current audio input RMS level (0-32767). Updated each audio chunk."""
         return self._current_rms
 
+    @property
+    def is_recording(self) -> bool:
+        """Whether audio recording is currently active."""
+        return self._recording
+
     # -- public methods ------------------------------------------------------
 
     def _ensure_stream(self) -> None: