Merge remote-tracking branch 'origin/main' into sid/types-and-lints

# Conflicts: # gateway/platforms/base.py # gateway/platforms/qqbot/adapter.py # gateway/platforms/slack.py # hermes_cli/main.py # scripts/batch_runner.py # tools/skills_tool.py # uv.lock
2026-05-08 03:01:47 +00:00 · 2026-04-21 20:28:45 +05:30 · 2026-04-21 20:28:45 +05:30 · a9ed7cb3b4
commit a9ed7cb3b4
parent 15ac253b11 432772dbdf
117 changed files with 7791 additions and 611 deletions
--- a/tests/acp/test_approval_isolation.py
+++ b/tests/acp/test_approval_isolation.py
@ -0,0 +1,170 @@
+"""Tests for GHSA-96vc-wcxf-jjff and GHSA-qg5c-hvr5-hjgr.
+
+Two related ACP approval-flow issues:
+- 96vc: ACP didn't set HERMES_EXEC_ASK, so `check_all_command_guards`
+  took the non-interactive auto-approve path and never consulted the
+  ACP-supplied callback.
+- qg5c: `_approval_callback` was a module-global in terminal_tool;
+  overlapping ACP sessions overwrote each other's callback slot.
+
+Both fixed together by:
+1. Setting HERMES_EXEC_ASK inside _run_agent (wraps the agent call).
+2. Storing the callback in thread-local state so concurrent executor
+   threads don't collide.
+"""
+
+import os
+import threading
+from unittest.mock import MagicMock
+
+import pytest
+
+
+class TestThreadLocalApprovalCallback:
+    """GHSA-qg5c-hvr5-hjgr: set_approval_callback must be per-thread so
+    concurrent ACP sessions don't stomp on each other's handlers."""
+
+    def test_set_and_get_in_same_thread(self):
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        cb1 = lambda cmd, desc: "once"  # noqa: E731
+        set_approval_callback(cb1)
+        assert _get_approval_callback() is cb1
+
+    def test_callback_not_visible_in_different_thread(self):
+        """Thread A's callback is NOT visible to Thread B."""
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        cb_a = lambda cmd, desc: "thread_a"  # noqa: E731
+        cb_b = lambda cmd, desc: "thread_b"  # noqa: E731
+
+        seen_in_a = []
+        seen_in_b = []
+
+        def thread_a():
+            set_approval_callback(cb_a)
+            # Pause so thread B has time to set its own callback
+            import time
+            time.sleep(0.05)
+            seen_in_a.append(_get_approval_callback())
+
+        def thread_b():
+            set_approval_callback(cb_b)
+            import time
+            time.sleep(0.05)
+            seen_in_b.append(_get_approval_callback())
+
+        ta = threading.Thread(target=thread_a)
+        tb = threading.Thread(target=thread_b)
+        ta.start()
+        tb.start()
+        ta.join()
+        tb.join()
+
+        # Each thread must see ONLY its own callback — not the other's
+        assert seen_in_a == [cb_a]
+        assert seen_in_b == [cb_b]
+
+    def test_main_thread_callback_not_leaked_to_worker(self):
+        """A callback set in the main thread does NOT leak into a
+        freshly-spawned worker thread."""
+        from tools.terminal_tool import (
+            set_approval_callback,
+            _get_approval_callback,
+        )
+
+        cb_main = lambda cmd, desc: "main"  # noqa: E731
+        set_approval_callback(cb_main)
+
+        worker_saw = []
+
+        def worker():
+            worker_saw.append(_get_approval_callback())
+
+        t = threading.Thread(target=worker)
+        t.start()
+        t.join()
+
+        # Worker thread has no callback set — TLS is empty for it
+        assert worker_saw == [None]
+        # Main thread still has its callback
+        assert _get_approval_callback() is cb_main
+
+    def test_sudo_password_callback_also_thread_local(self):
+        """Same protection applies to the sudo password callback."""
+        from tools.terminal_tool import (
+            set_sudo_password_callback,
+            _get_sudo_password_callback,
+        )
+
+        cb_main = lambda: "main-password"  # noqa: E731
+        set_sudo_password_callback(cb_main)
+
+        worker_saw = []
+
+        def worker():
+            worker_saw.append(_get_sudo_password_callback())
+
+        t = threading.Thread(target=worker)
+        t.start()
+        t.join()
+
+        assert worker_saw == [None]
+        assert _get_sudo_password_callback() is cb_main
+
+
+class TestAcpExecAskGate:
+    """GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so
+    that tools.approval.check_all_command_guards takes the CLI-interactive
+    path (consults the registered callback via prompt_dangerous_approval)
+    instead of the non-interactive auto-approve shortcut.
+
+    (HERMES_EXEC_ASK takes the gateway-queue path which requires a
+    notify_cb registered in _gateway_notify_cbs — not applicable to ACP,
+    which uses a direct callback shape.)"""
+
+    def test_interactive_env_var_routes_to_callback(self, monkeypatch):
+        """When HERMES_INTERACTIVE is set and an approval callback is
+        registered, a dangerous command must route through the callback."""
+        # Clean env
+        monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
+        monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
+        monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
+        monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
+
+        from tools.approval import check_all_command_guards
+
+        called_with = []
+
+        def fake_cb(command, description, *, allow_permanent=True):
+            called_with.append((command, description))
+            return "once"
+
+        # Without HERMES_INTERACTIVE: takes auto-approve path, callback NOT called
+        result = check_all_command_guards(
+            "rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
+        )
+        assert result["approved"] is True
+        assert called_with == [], (
+            "without HERMES_INTERACTIVE the non-interactive auto-approve "
+            "path should fire without consulting the callback"
+        )
+
+        # With HERMES_INTERACTIVE: callback IS called, approval flows through it
+        monkeypatch.setenv("HERMES_INTERACTIVE", "1")
+        called_with.clear()
+        result = check_all_command_guards(
+            "rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
+        )
+        assert called_with, (
+            "with HERMES_INTERACTIVE the approval path should consult the "
+            "registered callback — this was the ACP bypass in "
+            "GHSA-96vc-wcxf-jjff"
+        )
+        assert result["approved"] is True
--- a/tests/acp/test_permissions.py
+++ b/tests/acp/test_permissions.py
@ -73,3 +73,17 @@ class TestApprovalMapping:
            result = cb("rm -rf /", "dangerous")

        assert result == "deny"
+
+    def test_approval_none_response_returns_deny(self):
+        """When request_permission resolves to None, the callback should return 'deny'."""
+        loop = MagicMock(spec=asyncio.AbstractEventLoop)
+        mock_rp = MagicMock(name="request_permission")
+
+        future = MagicMock(spec=Future)
+        future.result.return_value = None
+
+        with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
+            cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0)
+            result = cb("echo hi", "demo")
+
+        assert result == "deny"
--- a/tests/acp/test_server.py
+++ b/tests/acp/test_server.py
@ -95,19 +95,37 @@ class TestInitialize:

 class TestAuthenticate:
    @pytest.mark.asyncio
-    async def test_authenticate_with_provider_configured(self, agent, monkeypatch):
+    async def test_authenticate_with_matching_method_id(self, agent, monkeypatch):
        monkeypatch.setattr(
-            "acp_adapter.server.has_provider",
-            lambda: True,
+            "acp_adapter.server.detect_provider",
+            lambda: "openrouter",
        )
        resp = await agent.authenticate(method_id="openrouter")
        assert isinstance(resp, AuthenticateResponse)

+    @pytest.mark.asyncio
+    async def test_authenticate_is_case_insensitive(self, agent, monkeypatch):
+        monkeypatch.setattr(
+            "acp_adapter.server.detect_provider",
+            lambda: "openrouter",
+        )
+        resp = await agent.authenticate(method_id="OpenRouter")
+        assert isinstance(resp, AuthenticateResponse)
+
+    @pytest.mark.asyncio
+    async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch):
+        monkeypatch.setattr(
+            "acp_adapter.server.detect_provider",
+            lambda: "openrouter",
+        )
+        resp = await agent.authenticate(method_id="totally-invalid-method")
+        assert resp is None
+
    @pytest.mark.asyncio
    async def test_authenticate_without_provider(self, agent, monkeypatch):
        monkeypatch.setattr(
-            "acp_adapter.server.has_provider",
-            lambda: False,
+            "acp_adapter.server.detect_provider",
+            lambda: None,
        )
        resp = await agent.authenticate(method_id="openrouter")
        assert resp is None
@ -252,6 +270,57 @@ class TestListAndFork:

        mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3")

+    @pytest.mark.asyncio
+    async def test_list_sessions_pagination_first_page(self, agent):
+        from acp_adapter import server as acp_server
+
+        infos = [
+            {"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
+            for i in range(acp_server._LIST_SESSIONS_PAGE_SIZE + 5)
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions()
+
+        assert len(resp.sessions) == acp_server._LIST_SESSIONS_PAGE_SIZE
+        assert resp.next_cursor == resp.sessions[-1].session_id
+
+    @pytest.mark.asyncio
+    async def test_list_sessions_pagination_no_more(self, agent):
+        infos = [
+            {"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
+            for i in range(3)
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions()
+
+        assert len(resp.sessions) == 3
+        assert resp.next_cursor is None
+
+    @pytest.mark.asyncio
+    async def test_list_sessions_cursor_resumes_after_match(self, agent):
+        infos = [
+            {"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+            {"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+            {"session_id": "s3", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions(cursor="s1")
+
+        assert [s.session_id for s in resp.sessions] == ["s2", "s3"]
+        assert resp.next_cursor is None
+
+    @pytest.mark.asyncio
+    async def test_list_sessions_unknown_cursor_returns_empty(self, agent):
+        infos = [
+            {"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+            {"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
+        ]
+        with patch.object(agent.session_manager, "list_sessions", return_value=infos):
+            resp = await agent.list_sessions(cursor="does-not-exist")
+
+        assert resp.sessions == []
+        assert resp.next_cursor is None
+
 # ---------------------------------------------------------------------------
 # session configuration / model routing
 # ---------------------------------------------------------------------------
--- a/tests/agent/test_anthropic_adapter.py
+++ b/tests/agent/test_anthropic_adapter.py
@ -414,7 +414,11 @@ class TestRunOauthSetupToken:
            token = run_oauth_setup_token()

        assert token == "from-cred-file"
-        mock_run.assert_called_once()
+        # Don't assert exact call count — the contract is "credentials flow
+        # through", not "exactly one subprocess call". xdist cross-test
+        # pollution (other tests shimming subprocess via plugins) has flaked
+        # assert_called_once() in CI.
+        assert mock_run.called

    def test_returns_token_from_env_var(self, monkeypatch, tmp_path):
        """Falls back to CLAUDE_CODE_OAUTH_TOKEN env var when no cred files."""
--- a/tests/agent/test_anthropic_normalize_v2.py
+++ b/tests/agent/test_anthropic_normalize_v2.py
@ -0,0 +1,238 @@
+"""Regression tests: normalize_anthropic_response_v2 vs v1.
+
+Constructs mock Anthropic responses and asserts that the v2 function
+(returning NormalizedResponse) produces identical field values to the
+original v1 function (returning SimpleNamespace + finish_reason).
+"""
+
+import json
+import pytest
+from types import SimpleNamespace
+
+from agent.anthropic_adapter import (
+    normalize_anthropic_response,
+    normalize_anthropic_response_v2,
+)
+from agent.transports.types import NormalizedResponse, ToolCall
+
+
+# ---------------------------------------------------------------------------
+# Helpers to build mock Anthropic SDK responses
+# ---------------------------------------------------------------------------
+
+def _text_block(text: str):
+    return SimpleNamespace(type="text", text=text)
+
+
+def _thinking_block(thinking: str, signature: str = "sig_abc"):
+    return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
+
+
+def _tool_use_block(id: str, name: str, input: dict):
+    return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
+
+
+def _response(content_blocks, stop_reason="end_turn"):
+    return SimpleNamespace(
+        content=content_blocks,
+        stop_reason=stop_reason,
+        usage=SimpleNamespace(
+            input_tokens=10,
+            output_tokens=5,
+        ),
+    )
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+class TestTextOnly:
+    """Text-only response — no tools, no thinking."""
+
+    def setup_method(self):
+        self.resp = _response([_text_block("Hello world")])
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_type(self):
+        assert isinstance(self.v2, NormalizedResponse)
+
+    def test_content_matches(self):
+        assert self.v2.content == self.v1_msg.content
+
+    def test_finish_reason_matches(self):
+        assert self.v2.finish_reason == self.v1_finish
+
+    def test_no_tool_calls(self):
+        assert self.v2.tool_calls is None
+        assert self.v1_msg.tool_calls is None
+
+    def test_no_reasoning(self):
+        assert self.v2.reasoning is None
+        assert self.v1_msg.reasoning is None
+
+
+class TestWithToolCalls:
+    """Response with tool calls."""
+
+    def setup_method(self):
+        self.resp = _response(
+            [
+                _text_block("I'll check that"),
+                _tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
+                _tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
+            ],
+            stop_reason="tool_use",
+        )
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_finish_reason(self):
+        assert self.v2.finish_reason == "tool_calls"
+        assert self.v1_finish == "tool_calls"
+
+    def test_tool_call_count(self):
+        assert len(self.v2.tool_calls) == 2
+        assert len(self.v1_msg.tool_calls) == 2
+
+    def test_tool_call_ids_match(self):
+        for i in range(2):
+            assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
+
+    def test_tool_call_names_match(self):
+        assert self.v2.tool_calls[0].name == "terminal"
+        assert self.v2.tool_calls[1].name == "read_file"
+        for i in range(2):
+            assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
+
+    def test_tool_call_arguments_match(self):
+        for i in range(2):
+            assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
+
+    def test_content_preserved(self):
+        assert self.v2.content == self.v1_msg.content
+        assert "check that" in self.v2.content
+
+
+class TestWithThinking:
+    """Response with thinking blocks (Claude 3.5+ extended thinking)."""
+
+    def setup_method(self):
+        self.resp = _response([
+            _thinking_block("Let me think about this carefully..."),
+            _text_block("The answer is 42."),
+        ])
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_reasoning_matches(self):
+        assert self.v2.reasoning == self.v1_msg.reasoning
+        assert "think about this" in self.v2.reasoning
+
+    def test_reasoning_details_in_provider_data(self):
+        v1_details = self.v1_msg.reasoning_details
+        v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
+        assert v1_details is not None
+        assert v2_details is not None
+        assert len(v2_details) == len(v1_details)
+
+    def test_content_excludes_thinking(self):
+        assert self.v2.content == "The answer is 42."
+
+
+class TestMixed:
+    """Response with thinking + text + tool calls."""
+
+    def setup_method(self):
+        self.resp = _response(
+            [
+                _thinking_block("Planning my approach..."),
+                _text_block("I'll run the command"),
+                _tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
+            ],
+            stop_reason="tool_use",
+        )
+        self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
+        self.v2 = normalize_anthropic_response_v2(self.resp)
+
+    def test_all_fields_present(self):
+        assert self.v2.content is not None
+        assert self.v2.tool_calls is not None
+        assert self.v2.reasoning is not None
+        assert self.v2.finish_reason == "tool_calls"
+
+    def test_content_matches(self):
+        assert self.v2.content == self.v1_msg.content
+
+    def test_reasoning_matches(self):
+        assert self.v2.reasoning == self.v1_msg.reasoning
+
+    def test_tool_call_matches(self):
+        assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
+        assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
+
+
+class TestStopReasons:
+    """Verify finish_reason mapping matches between v1 and v2."""
+
+    @pytest.mark.parametrize("stop_reason,expected", [
+        ("end_turn", "stop"),
+        ("tool_use", "tool_calls"),
+        ("max_tokens", "length"),
+        ("stop_sequence", "stop"),
+        ("refusal", "content_filter"),
+        ("model_context_window_exceeded", "length"),
+        ("unknown_future_reason", "stop"),
+    ])
+    def test_stop_reason_mapping(self, stop_reason, expected):
+        resp = _response([_text_block("x")], stop_reason=stop_reason)
+        v1_msg, v1_finish = normalize_anthropic_response(resp)
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.finish_reason == v1_finish == expected
+
+
+class TestStripToolPrefix:
+    """Verify mcp_ prefix stripping works identically."""
+
+    def test_prefix_stripped(self):
+        resp = _response(
+            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
+            stop_reason="tool_use",
+        )
+        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
+        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
+        assert v1_msg.tool_calls[0].function.name == "terminal"
+        assert v2.tool_calls[0].name == "terminal"
+
+    def test_prefix_kept(self):
+        resp = _response(
+            [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
+            stop_reason="tool_use",
+        )
+        v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
+        v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
+        assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
+        assert v2.tool_calls[0].name == "mcp_terminal"
+
+
+class TestEdgeCases:
+    """Edge cases: empty content, no blocks, etc."""
+
+    def test_empty_content_blocks(self):
+        resp = _response([])
+        v1_msg, v1_finish = normalize_anthropic_response(resp)
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.content == v1_msg.content
+        assert v2.content is None
+
+    def test_no_reasoning_details_means_none_provider_data(self):
+        resp = _response([_text_block("hi")])
+        v2 = normalize_anthropic_response_v2(resp)
+        assert v2.provider_data is None
+
+    def test_v2_returns_dataclass_not_namespace(self):
+        resp = _response([_text_block("hi")])
+        v2 = normalize_anthropic_response_v2(resp)
+        assert isinstance(v2, NormalizedResponse)
+        assert not isinstance(v2, SimpleNamespace)
--- a/tests/agent/test_copilot_acp_client.py
+++ b/tests/agent/test_copilot_acp_client.py
@ -0,0 +1,146 @@
+"""Focused regressions for the Copilot ACP shim safety layer."""
+
+from __future__ import annotations
+
+import io
+import json
+import os
+import tempfile
+import unittest
+from pathlib import Path
+from unittest.mock import patch
+
+from agent.copilot_acp_client import CopilotACPClient
+
+
+class _FakeProcess:
+    def __init__(self) -> None:
+        self.stdin = io.StringIO()
+
+
+class CopilotACPClientSafetyTests(unittest.TestCase):
+    def setUp(self) -> None:
+        self.client = CopilotACPClient(acp_cwd="/tmp")
+
+    def _dispatch(self, message: dict, *, cwd: str) -> dict:
+        process = _FakeProcess()
+        handled = self.client._handle_server_message(
+            message,
+            process=process,
+            cwd=cwd,
+            text_parts=[],
+            reasoning_parts=[],
+        )
+        self.assertTrue(handled)
+        payload = process.stdin.getvalue().strip()
+        self.assertTrue(payload)
+        return json.loads(payload)
+
+    def test_request_permission_is_not_auto_allowed(self) -> None:
+        response = self._dispatch(
+            {
+                "jsonrpc": "2.0",
+                "id": 1,
+                "method": "session/request_permission",
+                "params": {},
+            },
+            cwd="/tmp",
+        )
+
+        outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome"))
+        self.assertEqual(outcome, "cancelled")
+
+    def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            home = Path(tmpdir) / "home"
+            blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json"
+            blocked.parent.mkdir(parents=True, exist_ok=True)
+            blocked.write_text('{"token":"sk-test-secret-1234567890"}')
+
+            with patch.dict(
+                os.environ,
+                {"HOME": str(home), "HERMES_HOME": str(home / ".hermes")},
+                clear=False,
+            ):
+                response = self._dispatch(
+                    {
+                        "jsonrpc": "2.0",
+                        "id": 2,
+                        "method": "fs/read_text_file",
+                        "params": {"path": str(blocked)},
+                    },
+                    cwd=str(home),
+                )
+
+        self.assertIn("error", response)
+
+    def test_read_text_file_redacts_sensitive_content(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            secret_file = root / "config.env"
+            secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012")
+
+            response = self._dispatch(
+                {
+                    "jsonrpc": "2.0",
+                    "id": 3,
+                    "method": "fs/read_text_file",
+                    "params": {"path": str(secret_file)},
+                },
+                cwd=str(root),
+            )
+
+        content = ((response.get("result") or {}).get("content") or "")
+        self.assertNotIn("abc123def456", content)
+        self.assertIn("OPENAI_API_KEY=", content)
+
+    def test_write_text_file_reuses_write_denylist(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            home = Path(tmpdir) / "home"
+            target = home / ".ssh" / "id_rsa"
+            target.parent.mkdir(parents=True, exist_ok=True)
+
+            with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
+                response = self._dispatch(
+                    {
+                        "jsonrpc": "2.0",
+                        "id": 4,
+                        "method": "fs/write_text_file",
+                        "params": {
+                            "path": str(target),
+                            "content": "fake-private-key",
+                        },
+                    },
+                    cwd=str(home),
+                )
+
+        self.assertIn("error", response)
+        self.assertFalse(target.exists())
+
+    def test_write_text_file_respects_safe_root(self) -> None:
+        with tempfile.TemporaryDirectory() as tmpdir:
+            root = Path(tmpdir)
+            safe_root = root / "workspace"
+            safe_root.mkdir()
+            outside = root / "outside.txt"
+
+            with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False):
+                response = self._dispatch(
+                    {
+                        "jsonrpc": "2.0",
+                        "id": 5,
+                        "method": "fs/write_text_file",
+                        "params": {
+                            "path": str(outside),
+                            "content": "should-not-write",
+                        },
+                    },
+                    cwd=str(root),
+                )
+
+        self.assertIn("error", response)
+        self.assertFalse(outside.exists())
+
+
+if __name__ == "__main__":
+    unittest.main()
--- a/tests/agent/test_insights.py
+++ b/tests/agent/test_insights.py
@ -516,13 +516,12 @@ class TestGatewayFormatting:
        assert "**" in text  # Markdown bold

    def test_gateway_format_hides_cost(self, populated_db):
+        """Gateway format omits dollar figures and internal cache details."""
        engine = InsightsEngine(populated_db)
        report = engine.generate(days=30)
        text = engine.format_gateway(report)

-        assert "$" in text
-        assert "Top Skills" in text
-        assert "Est. cost" in text
+        assert "$" not in text
        assert "cache" not in text.lower()

    def test_gateway_format_shows_models(self, populated_db):
--- a/tests/agent/test_minimax_provider.py
+++ b/tests/agent/test_minimax_provider.py
@ -84,38 +84,6 @@ class TestMinimaxAuxModel:
        assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]


-class TestMinimaxModelCatalog:
-    """Verify the model catalog matches official Anthropic-compat endpoint models.
-
-    Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
-    """
-
-    def test_catalog_includes_current_models(self):
-        from hermes_cli.models import _PROVIDER_MODELS
-        for provider in ("minimax", "minimax-cn"):
-            models = _PROVIDER_MODELS[provider]
-            assert "MiniMax-M2.7" in models
-            assert "MiniMax-M2.5" in models
-            assert "MiniMax-M2.1" in models
-            assert "MiniMax-M2" in models
-
-    def test_catalog_excludes_m1_family(self):
-        """M1 models are not available on the /anthropic endpoint."""
-        from hermes_cli.models import _PROVIDER_MODELS
-        for provider in ("minimax", "minimax-cn"):
-            models = _PROVIDER_MODELS[provider]
-            assert "MiniMax-M1" not in models
-
-    def test_catalog_excludes_highspeed(self):
-        """Highspeed variants are available but not shown in default catalog
-        (users can still specify them manually)."""
-        from hermes_cli.models import _PROVIDER_MODELS
-        for provider in ("minimax", "minimax-cn"):
-            models = _PROVIDER_MODELS[provider]
-            assert "MiniMax-M2.7-highspeed" not in models
-            assert "MiniMax-M2.5-highspeed" not in models
-
-
 class TestMinimaxBetaHeaders:
    """MiniMax Anthropic-compat endpoints reject fine-grained-tool-streaming beta.

--- a/tests/agent/test_proxy_and_url_validation.py
+++ b/tests/agent/test_proxy_and_url_validation.py
@ -6,6 +6,8 @@ when proxy env vars or custom endpoint URLs are malformed.
 """
 from __future__ import annotations

+import os
+
 import pytest

 from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
@ -31,6 +33,12 @@ def test_proxy_env_accepts_empty(monkeypatch):
    _validate_proxy_env_urls()  # should not raise


+def test_proxy_env_normalizes_socks_alias(monkeypatch):
+    monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
+    _validate_proxy_env_urls()
+    assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/"
+
+
@pytest.mark.parametrize("key", [
    "HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
    "http_proxy", "https_proxy", "all_proxy",
--- a/tests/agent/test_skill_commands.py
+++ b/tests/agent/test_skill_commands.py
@ -405,3 +405,191 @@ class TestPlanSkillHelpers:
        assert "Add a /plan command" in msg
        assert ".hermes/plans/plan.md" in msg
        assert "Runtime note:" in msg
+
+
+class TestSkillDirectoryHeader:
+    """The activation message must expose the absolute skill directory and
+    explain how to resolve relative paths, so skills with bundled scripts
+    don't force the agent into a second ``skill_view()`` round-trip."""
+
+    def test_header_contains_absolute_skill_dir(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            skill_dir = _make_skill(tmp_path, "abs-dir-skill")
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/abs-dir-skill", "go")
+
+        assert msg is not None
+        assert f"[Skill directory: {skill_dir}]" in msg
+        assert "Resolve any relative paths" in msg
+
+    def test_supporting_files_shown_with_absolute_paths(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            skill_dir = _make_skill(tmp_path, "scripted-skill")
+            (skill_dir / "scripts").mkdir()
+            (skill_dir / "scripts" / "run.js").write_text("console.log('hi')")
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/scripted-skill")
+
+        assert msg is not None
+        # The supporting-files block must emit both the relative form (so the
+        # agent can call skill_view on it) and the absolute form (so it can
+        # run the script directly via terminal).
+        assert "scripts/run.js" in msg
+        assert str(skill_dir / "scripts" / "run.js") in msg
+        assert f"node {skill_dir}/scripts/foo.js" in msg
+
+
+class TestTemplateVarSubstitution:
+    """``${HERMES_SKILL_DIR}`` and ``${HERMES_SESSION_ID}`` in SKILL.md body
+    are replaced before the agent sees the content."""
+
+    def test_substitutes_skill_dir(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            skill_dir = _make_skill(
+                tmp_path,
+                "templated",
+                body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/templated")
+
+        assert msg is not None
+        assert f"node {skill_dir}/scripts/foo.js" in msg
+        # The literal template token must not leak through.
+        assert "${HERMES_SKILL_DIR}" not in msg.split("[Skill directory:")[0]
+
+    def test_substitutes_session_id_when_available(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "sess-templated",
+                body="Session: ${HERMES_SESSION_ID}",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message(
+                "/sess-templated", task_id="abc-123"
+            )
+
+        assert msg is not None
+        assert "Session: abc-123" in msg
+
+    def test_leaves_session_id_token_when_missing(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "sess-missing",
+                body="Session: ${HERMES_SESSION_ID}",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/sess-missing", task_id=None)
+
+        assert msg is not None
+        # No session — token left intact so the author can spot it.
+        assert "Session: ${HERMES_SESSION_ID}" in msg
+
+    def test_disable_template_vars_via_config(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": False},
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "no-sub",
+                body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/no-sub")
+
+        assert msg is not None
+        # Template token must survive when substitution is disabled.
+        assert "${HERMES_SKILL_DIR}/scripts/foo.js" in msg
+
+
+class TestInlineShellExpansion:
+    """Inline ``!`cmd`` snippets in SKILL.md run before the agent sees the
+    content — but only when the user has opted in via config."""
+
+    def test_inline_shell_is_off_by_default(self, tmp_path):
+        with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
+            _make_skill(
+                tmp_path,
+                "dyn-default-off",
+                body="Today is !`echo INLINE_RAN`.",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-default-off")
+
+        assert msg is not None
+        # Default config has inline_shell=False — snippet must stay literal.
+        assert "!`echo INLINE_RAN`" in msg
+        assert "Today is INLINE_RAN." not in msg
+
+    def test_inline_shell_runs_when_enabled(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": True, "inline_shell": True,
+                              "inline_shell_timeout": 5},
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "dyn-on",
+                body="Marker: !`echo INLINE_RAN`.",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-on")
+
+        assert msg is not None
+        assert "Marker: INLINE_RAN." in msg
+        assert "!`echo INLINE_RAN`" not in msg
+
+    def test_inline_shell_runs_in_skill_directory(self, tmp_path):
+        """Inline snippets get the skill dir as CWD so relative paths work."""
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": True, "inline_shell": True,
+                              "inline_shell_timeout": 5},
+            ),
+        ):
+            skill_dir = _make_skill(
+                tmp_path,
+                "dyn-cwd",
+                body="Here: !`pwd`",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-cwd")
+
+        assert msg is not None
+        assert f"Here: {skill_dir}" in msg
+
+    def test_inline_shell_timeout_does_not_break_message(self, tmp_path):
+        with (
+            patch("tools.skills_tool.SKILLS_DIR", tmp_path),
+            patch(
+                "agent.skill_commands._load_skills_config",
+                return_value={"template_vars": True, "inline_shell": True,
+                              "inline_shell_timeout": 1},
+            ),
+        ):
+            _make_skill(
+                tmp_path,
+                "dyn-slow",
+                body="Slow: !`sleep 5 && printf DYN_MARKER`",
+            )
+            scan_skill_commands()
+            msg = build_skill_invocation_message("/dyn-slow")
+
+        assert msg is not None
+        # Timeout is surfaced as a marker instead of propagating as an error,
+        # and the rest of the skill message still renders.
+        assert "inline-shell timeout" in msg
+        # The command's intended stdout never made it through — only the
+        # timeout marker (which echoes the command text) survives.
+        assert "DYN_MARKER" not in msg.replace("sleep 5 && printf DYN_MARKER", "")
--- a/tests/agent/transports/init.py
+++ b/tests/agent/transports/init.py
--- a/tests/agent/transports/test_transport.py
+++ b/tests/agent/transports/test_transport.py
@ -0,0 +1,220 @@
+"""Tests for the transport ABC, registry, and AnthropicTransport."""
+
+import pytest
+from types import SimpleNamespace
+from unittest.mock import MagicMock
+
+from agent.transports.base import ProviderTransport
+from agent.transports.types import NormalizedResponse, ToolCall, Usage
+from agent.transports import get_transport, register_transport, _REGISTRY
+
+
+# ── ABC contract tests ──────────────────────────────────────────────────
+
+class TestProviderTransportABC:
+    """Verify the ABC contract is enforceable."""
+
+    def test_cannot_instantiate_abc(self):
+        with pytest.raises(TypeError):
+            ProviderTransport()
+
+    def test_concrete_must_implement_all_abstract(self):
+        class Incomplete(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "test"
+        with pytest.raises(TypeError):
+            Incomplete()
+
+    def test_minimal_concrete(self):
+        class Minimal(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "test_minimal"
+            def convert_messages(self, messages, **kw):
+                return messages
+            def convert_tools(self, tools):
+                return tools
+            def build_kwargs(self, model, messages, tools=None, **params):
+                return {"model": model, "messages": messages}
+            def normalize_response(self, response, **kw):
+                return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop")
+
+        t = Minimal()
+        assert t.api_mode == "test_minimal"
+        assert t.validate_response(None) is True  # default
+        assert t.extract_cache_stats(None) is None  # default
+        assert t.map_finish_reason("end_turn") == "end_turn"  # default passthrough
+
+
+# ── Registry tests ───────────────────────────────────────────────────────
+
+class TestTransportRegistry:
+
+    def test_get_unregistered_returns_none(self):
+        assert get_transport("nonexistent_mode") is None
+
+    def test_anthropic_registered_on_import(self):
+        import agent.transports.anthropic  # noqa: F401
+        t = get_transport("anthropic_messages")
+        assert t is not None
+        assert t.api_mode == "anthropic_messages"
+
+    def test_register_and_get(self):
+        class DummyTransport(ProviderTransport):
+            @property
+            def api_mode(self):
+                return "dummy_test"
+            def convert_messages(self, messages, **kw):
+                return messages
+            def convert_tools(self, tools):
+                return tools
+            def build_kwargs(self, model, messages, tools=None, **params):
+                return {}
+            def normalize_response(self, response, **kw):
+                return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop")
+
+        register_transport("dummy_test", DummyTransport)
+        t = get_transport("dummy_test")
+        assert t.api_mode == "dummy_test"
+        # Cleanup
+        _REGISTRY.pop("dummy_test", None)
+
+
+# ── AnthropicTransport tests ────────────────────────────────────────────
+
+class TestAnthropicTransport:
+
+    @pytest.fixture
+    def transport(self):
+        import agent.transports.anthropic  # noqa: F401
+        return get_transport("anthropic_messages")
+
+    def test_api_mode(self, transport):
+        assert transport.api_mode == "anthropic_messages"
+
+    def test_convert_tools_simple(self, transport):
+        tools = [{
+            "type": "function",
+            "function": {
+                "name": "test_tool",
+                "description": "A test",
+                "parameters": {"type": "object", "properties": {}},
+            }
+        }]
+        result = transport.convert_tools(tools)
+        assert len(result) == 1
+        assert result[0]["name"] == "test_tool"
+        assert "input_schema" in result[0]
+
+    def test_validate_response_none(self, transport):
+        assert transport.validate_response(None) is False
+
+    def test_validate_response_empty_content(self, transport):
+        r = SimpleNamespace(content=[])
+        assert transport.validate_response(r) is False
+
+    def test_validate_response_valid(self, transport):
+        r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
+        assert transport.validate_response(r) is True
+
+    def test_map_finish_reason(self, transport):
+        assert transport.map_finish_reason("end_turn") == "stop"
+        assert transport.map_finish_reason("tool_use") == "tool_calls"
+        assert transport.map_finish_reason("max_tokens") == "length"
+        assert transport.map_finish_reason("stop_sequence") == "stop"
+        assert transport.map_finish_reason("refusal") == "content_filter"
+        assert transport.map_finish_reason("model_context_window_exceeded") == "length"
+        assert transport.map_finish_reason("unknown") == "stop"
+
+    def test_extract_cache_stats_none_usage(self, transport):
+        r = SimpleNamespace(usage=None)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_extract_cache_stats_with_cache(self, transport):
+        usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50)
+        r = SimpleNamespace(usage=usage)
+        result = transport.extract_cache_stats(r)
+        assert result == {"cached_tokens": 100, "creation_tokens": 50}
+
+    def test_extract_cache_stats_zero(self, transport):
+        usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0)
+        r = SimpleNamespace(usage=usage)
+        assert transport.extract_cache_stats(r) is None
+
+    def test_normalize_response_text(self, transport):
+        """Test normalization of a simple text response."""
+        r = SimpleNamespace(
+            content=[SimpleNamespace(type="text", text="Hello world")],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=5),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert isinstance(nr, NormalizedResponse)
+        assert nr.content == "Hello world"
+        assert nr.tool_calls is None or nr.tool_calls == []
+        assert nr.finish_reason == "stop"
+
+    def test_normalize_response_tool_calls(self, transport):
+        """Test normalization of a tool-use response."""
+        r = SimpleNamespace(
+            content=[
+                SimpleNamespace(
+                    type="tool_use",
+                    id="toolu_123",
+                    name="terminal",
+                    input={"command": "ls"},
+                ),
+            ],
+            stop_reason="tool_use",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=20),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert nr.finish_reason == "tool_calls"
+        assert len(nr.tool_calls) == 1
+        tc = nr.tool_calls[0]
+        assert tc.name == "terminal"
+        assert tc.id == "toolu_123"
+        assert '"command"' in tc.arguments
+
+    def test_normalize_response_thinking(self, transport):
+        """Test normalization preserves thinking content."""
+        r = SimpleNamespace(
+            content=[
+                SimpleNamespace(type="thinking", thinking="Let me think..."),
+                SimpleNamespace(type="text", text="The answer is 42"),
+            ],
+            stop_reason="end_turn",
+            usage=SimpleNamespace(input_tokens=10, output_tokens=15),
+            model="claude-sonnet-4-6",
+        )
+        nr = transport.normalize_response(r)
+        assert nr.content == "The answer is 42"
+        assert nr.reasoning == "Let me think..."
+
+    def test_build_kwargs_returns_dict(self, transport):
+        """Test build_kwargs produces a usable kwargs dict."""
+        messages = [{"role": "user", "content": "Hello"}]
+        kw = transport.build_kwargs(
+            model="claude-sonnet-4-6",
+            messages=messages,
+            max_tokens=1024,
+        )
+        assert isinstance(kw, dict)
+        assert "model" in kw
+        assert "max_tokens" in kw
+        assert "messages" in kw
+
+    def test_convert_messages_extracts_system(self, transport):
+        """Test convert_messages separates system from messages."""
+        messages = [
+            {"role": "system", "content": "You are helpful."},
+            {"role": "user", "content": "Hi"},
+        ]
+        system, msgs = transport.convert_messages(messages)
+        # System should be extracted
+        assert system is not None
+        # Messages should only have user
+        assert len(msgs) >= 1
--- a/tests/agent/transports/test_types.py
+++ b/tests/agent/transports/test_types.py
@ -0,0 +1,151 @@
+"""Tests for agent/transports/types.py — dataclass construction + helpers."""
+
+import json
+import pytest
+
+from agent.transports.types import (
+    NormalizedResponse,
+    ToolCall,
+    Usage,
+    build_tool_call,
+    map_finish_reason,
+)
+
+
+# ---------------------------------------------------------------------------
+# ToolCall
+# ---------------------------------------------------------------------------
+
+class TestToolCall:
+    def test_basic_construction(self):
+        tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}')
+        assert tc.id == "call_abc"
+        assert tc.name == "terminal"
+        assert tc.arguments == '{"cmd": "ls"}'
+        assert tc.provider_data is None
+
+    def test_none_id(self):
+        tc = ToolCall(id=None, name="read_file", arguments="{}")
+        assert tc.id is None
+
+    def test_provider_data(self):
+        tc = ToolCall(
+            id="call_x",
+            name="t",
+            arguments="{}",
+            provider_data={"call_id": "call_x", "response_item_id": "fc_x"},
+        )
+        assert tc.provider_data["call_id"] == "call_x"
+        assert tc.provider_data["response_item_id"] == "fc_x"
+
+
+# ---------------------------------------------------------------------------
+# Usage
+# ---------------------------------------------------------------------------
+
+class TestUsage:
+    def test_defaults(self):
+        u = Usage()
+        assert u.prompt_tokens == 0
+        assert u.completion_tokens == 0
+        assert u.total_tokens == 0
+        assert u.cached_tokens == 0
+
+    def test_explicit(self):
+        u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80)
+        assert u.total_tokens == 150
+
+
+# ---------------------------------------------------------------------------
+# NormalizedResponse
+# ---------------------------------------------------------------------------
+
+class TestNormalizedResponse:
+    def test_text_only(self):
+        r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop")
+        assert r.content == "hello"
+        assert r.tool_calls is None
+        assert r.finish_reason == "stop"
+        assert r.reasoning is None
+        assert r.usage is None
+        assert r.provider_data is None
+
+    def test_with_tool_calls(self):
+        tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')]
+        r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls")
+        assert r.finish_reason == "tool_calls"
+        assert len(r.tool_calls) == 1
+        assert r.tool_calls[0].name == "terminal"
+
+    def test_with_reasoning(self):
+        r = NormalizedResponse(
+            content="answer",
+            tool_calls=None,
+            finish_reason="stop",
+            reasoning="I thought about it",
+        )
+        assert r.reasoning == "I thought about it"
+
+    def test_with_provider_data(self):
+        r = NormalizedResponse(
+            content=None,
+            tool_calls=None,
+            finish_reason="stop",
+            provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]},
+        )
+        assert r.provider_data["reasoning_details"][0]["type"] == "thinking"
+
+
+# ---------------------------------------------------------------------------
+# build_tool_call
+# ---------------------------------------------------------------------------
+
+class TestBuildToolCall:
+    def test_dict_arguments_serialized(self):
+        tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"})
+        assert tc.arguments == json.dumps({"cmd": "ls"})
+        assert tc.provider_data is None
+
+    def test_string_arguments_passthrough(self):
+        tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}')
+        assert tc.arguments == '{"path": "/tmp"}'
+
+    def test_provider_fields(self):
+        tc = build_tool_call(
+            id="call_3",
+            name="terminal",
+            arguments="{}",
+            call_id="call_3",
+            response_item_id="fc_3",
+        )
+        assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"}
+
+    def test_none_id(self):
+        tc = build_tool_call(id=None, name="t", arguments="{}")
+        assert tc.id is None
+
+
+# ---------------------------------------------------------------------------
+# map_finish_reason
+# ---------------------------------------------------------------------------
+
+class TestMapFinishReason:
+    ANTHROPIC_MAP = {
+        "end_turn": "stop",
+        "tool_use": "tool_calls",
+        "max_tokens": "length",
+        "stop_sequence": "stop",
+        "refusal": "content_filter",
+    }
+
+    def test_known_reason(self):
+        assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop"
+        assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls"
+        assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length"
+        assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter"
+
+    def test_unknown_reason_defaults_to_stop(self):
+        assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop"
+
+    def test_none_reason(self):
+        assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"
--- a/tests/cli/test_cli_steer_busy_path.py
+++ b/tests/cli/test_cli_steer_busy_path.py
@ -0,0 +1,146 @@
+"""Regression tests for classic-CLI mid-run /steer dispatch.
+
+Background
+----------
+/steer sent while the agent is running used to be queued through
+``self._pending_input`` alongside ordinary user input.  ``process_loop``
+pulls from that queue and calls ``process_command()`` — but while the
+agent is running, ``process_loop`` is blocked inside ``self.chat()``.
+By the time the queued /steer was pulled, ``_agent_running`` had
+already flipped back to False, so ``process_command()`` took the idle
+fallback (``"No agent running; queued as next turn"``) and delivered
+the steer as an ordinary next-turn message.
+
+The fix dispatches /steer inline on the UI thread when the agent is
+running — matching the existing pattern for /model — so the steer
+reaches ``agent.steer()`` (thread-safe) without touching the queue.
+
+These tests exercise the detector + inline dispatch without starting a
+prompt_toolkit app.
+"""
+
+from __future__ import annotations
+
+import importlib
+import sys
+from unittest.mock import MagicMock, patch
+
+
+def _make_cli():
+    """Create a HermesCLI instance with prompt_toolkit stubbed out."""
+    _clean_config = {
+        "model": {
+            "default": "anthropic/claude-opus-4.6",
+            "base_url": "https://openrouter.ai/api/v1",
+            "provider": "auto",
+        },
+        "display": {"compact": False, "tool_progress": "all"},
+        "agent": {},
+        "terminal": {"env_type": "local"},
+    }
+    clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
+    prompt_toolkit_stubs = {
+        "prompt_toolkit": MagicMock(),
+        "prompt_toolkit.history": MagicMock(),
+        "prompt_toolkit.styles": MagicMock(),
+        "prompt_toolkit.patch_stdout": MagicMock(),
+        "prompt_toolkit.application": MagicMock(),
+        "prompt_toolkit.layout": MagicMock(),
+        "prompt_toolkit.layout.processors": MagicMock(),
+        "prompt_toolkit.filters": MagicMock(),
+        "prompt_toolkit.layout.dimension": MagicMock(),
+        "prompt_toolkit.layout.menus": MagicMock(),
+        "prompt_toolkit.widgets": MagicMock(),
+        "prompt_toolkit.key_binding": MagicMock(),
+        "prompt_toolkit.completion": MagicMock(),
+        "prompt_toolkit.formatted_text": MagicMock(),
+        "prompt_toolkit.auto_suggest": MagicMock(),
+    }
+    with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
+        "os.environ", clean_env, clear=False
+    ):
+        import cli as _cli_mod
+
+        _cli_mod = importlib.reload(_cli_mod)
+        with patch.object(_cli_mod, "get_tool_definitions", return_value=[]), patch.dict(
+            _cli_mod.__dict__, {"CLI_CONFIG": _clean_config}
+        ):
+            return _cli_mod.HermesCLI()
+
+
+class TestSteerInlineDetector:
+    """_should_handle_steer_command_inline gates the busy-path fast dispatch."""
+
+    def test_detects_steer_when_agent_running(self):
+        cli = _make_cli()
+        cli._agent_running = True
+        assert cli._should_handle_steer_command_inline("/steer focus on error handling") is True
+
+    def test_ignores_steer_when_agent_idle(self):
+        """Idle-path /steer should fall through to the normal process_loop
+        dispatch so the queue-style fallback message is emitted."""
+        cli = _make_cli()
+        cli._agent_running = False
+        assert cli._should_handle_steer_command_inline("/steer do something") is False
+
+    def test_ignores_non_slash_input(self):
+        cli = _make_cli()
+        cli._agent_running = True
+        assert cli._should_handle_steer_command_inline("steer without slash") is False
+        assert cli._should_handle_steer_command_inline("") is False
+
+    def test_ignores_other_slash_commands(self):
+        cli = _make_cli()
+        cli._agent_running = True
+        assert cli._should_handle_steer_command_inline("/queue hello") is False
+        assert cli._should_handle_steer_command_inline("/stop") is False
+        assert cli._should_handle_steer_command_inline("/help") is False
+
+    def test_ignores_steer_with_attached_images(self):
+        """Image payloads take the normal path; steer doesn't accept images."""
+        cli = _make_cli()
+        cli._agent_running = True
+        assert cli._should_handle_steer_command_inline("/steer text", has_images=True) is False
+
+
+class TestSteerBusyPathDispatch:
+    """When the detector fires, process_command('/steer ...') must call
+    agent.steer() directly rather than the idle-path fallback."""
+
+    def test_process_command_routes_to_agent_steer(self):
+        """With _agent_running=True and agent.steer present, /steer reaches
+        agent.steer(payload), NOT _pending_input."""
+        cli = _make_cli()
+        cli._agent_running = True
+        cli.agent = MagicMock()
+        cli.agent.steer = MagicMock(return_value=True)
+        # Make sure the idle-path fallback would be observable if taken
+        cli._pending_input = MagicMock()
+
+        cli.process_command("/steer focus on errors")
+
+        cli.agent.steer.assert_called_once_with("focus on errors")
+        cli._pending_input.put.assert_not_called()
+
+    def test_idle_path_queues_as_next_turn(self):
+        """Control — when the agent is NOT running, /steer correctly falls
+        back to next-turn queue semantics.  Demonstrates why the fix was
+        needed: the queue path only works when you can actually drain it."""
+        cli = _make_cli()
+        cli._agent_running = False
+        cli.agent = MagicMock()
+        cli.agent.steer = MagicMock(return_value=True)
+        cli._pending_input = MagicMock()
+
+        cli.process_command("/steer would-be-next-turn")
+
+        # Idle path does NOT call agent.steer
+        cli.agent.steer.assert_not_called()
+        # It puts the payload in the queue as a normal next-turn message
+        cli._pending_input.put.assert_called_once_with("would-be-next-turn")
+
+
+if __name__ == "__main__":  # pragma: no cover
+    import pytest
+
+    pytest.main([__file__, "-v"])
--- a/tests/conftest.py
+++ b/tests/conftest.py
@ -186,6 +186,31 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
    "HERMES_HOME_MODE",
    "BROWSER_CDP_URL",
    "CAMOFOX_URL",
+    # Platform allowlists — not credentials, but if set from any source
+    # (user shell, earlier leaky test, CI env), they change gateway auth
+    # behavior and flake button-authorization tests.
+    "TELEGRAM_ALLOWED_USERS",
+    "DISCORD_ALLOWED_USERS",
+    "WHATSAPP_ALLOWED_USERS",
+    "SLACK_ALLOWED_USERS",
+    "SIGNAL_ALLOWED_USERS",
+    "SIGNAL_GROUP_ALLOWED_USERS",
+    "EMAIL_ALLOWED_USERS",
+    "SMS_ALLOWED_USERS",
+    "MATTERMOST_ALLOWED_USERS",
+    "MATRIX_ALLOWED_USERS",
+    "DINGTALK_ALLOWED_USERS",
+    "FEISHU_ALLOWED_USERS",
+    "WECOM_ALLOWED_USERS",
+    "GATEWAY_ALLOWED_USERS",
+    "GATEWAY_ALLOW_ALL_USERS",
+    "TELEGRAM_ALLOW_ALL_USERS",
+    "DISCORD_ALLOW_ALL_USERS",
+    "WHATSAPP_ALLOW_ALL_USERS",
+    "SLACK_ALLOW_ALL_USERS",
+    "SIGNAL_ALLOW_ALL_USERS",
+    "EMAIL_ALLOW_ALL_USERS",
+    "SMS_ALLOW_ALL_USERS",
 })


@ -258,6 +283,107 @@ def _isolate_hermes_home(_hermetic_environment):
    return None


+# ── Module-level state reset ───────────────────────────────────────────────
+#
+# Python modules are singletons per process, and pytest-xdist workers are
+# long-lived. Module-level dicts/sets (tool registries, approval state,
+# interrupt flags) and ContextVars persist across tests in the same worker,
+# causing tests that pass alone to fail when run with siblings.
+#
+# Each entry in this fixture clears state that belongs to a specific module.
+# New state buckets go here too — this is the single gate that prevents
+# "works alone, flakes in CI" bugs from state leakage.
+#
+# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
+# this closes; the running example was `test_command_guards` failing 12/15
+# CI runs because ``tools.approval._session_approved`` carried approvals
+# from one test's session into another's.
+
+@pytest.fixture(autouse=True)
+def _reset_module_state():
+    """Clear module-level mutable state and ContextVars between tests.
+
+    Keeps state from leaking across tests on the same xdist worker. Modules
+    that don't exist yet (test collection before production import) are
+    skipped silently — production import later creates fresh empty state.
+    """
+    # --- tools.approval — the single biggest source of cross-test pollution ---
+    try:
+        from tools import approval as _approval_mod
+        _approval_mod._session_approved.clear()
+        _approval_mod._session_yolo.clear()
+        _approval_mod._permanent_approved.clear()
+        _approval_mod._pending.clear()
+        _approval_mod._gateway_queues.clear()
+        _approval_mod._gateway_notify_cbs.clear()
+        # ContextVar: reset to empty string so get_current_session_key()
+        # falls through to the env var / default path, matching a fresh
+        # process.
+        _approval_mod._approval_session_key.set("")
+    except Exception:
+        pass
+
+    # --- tools.interrupt — per-thread interrupt flag set ---
+    try:
+        from tools import interrupt as _interrupt_mod
+        with _interrupt_mod._lock:
+            _interrupt_mod._interrupted_threads.clear()
+    except Exception:
+        pass
+
+    # --- gateway.session_context — 9 ContextVars that represent
+    #     the active gateway session. If set in one test and not reset,
+    #     the next test's get_session_env() reads stale values.
+    try:
+        from gateway import session_context as _sc_mod
+        for _cv in (
+            _sc_mod._SESSION_PLATFORM,
+            _sc_mod._SESSION_CHAT_ID,
+            _sc_mod._SESSION_CHAT_NAME,
+            _sc_mod._SESSION_THREAD_ID,
+            _sc_mod._SESSION_USER_ID,
+            _sc_mod._SESSION_USER_NAME,
+            _sc_mod._SESSION_KEY,
+            _sc_mod._CRON_AUTO_DELIVER_PLATFORM,
+            _sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
+            _sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
+        ):
+            _cv.set(_sc_mod._UNSET)
+    except Exception:
+        pass
+
+    # --- tools.env_passthrough — ContextVar<set[str]> with no default ---
+    # LookupError is normal if the test never set it. Setting it to an
+    # empty set unconditionally normalizes the starting state.
+    try:
+        from tools import env_passthrough as _envp_mod
+        _envp_mod._allowed_env_vars_var.set(set())
+    except Exception:
+        pass
+
+    # --- tools.credential_files — ContextVar<dict> ---
+    try:
+        from tools import credential_files as _credf_mod
+        _credf_mod._registered_files_var.set({})
+    except Exception:
+        pass
+
+    # --- tools.file_tools — per-task read history + file-ops cache ---
+    # _read_tracker accumulates per-task_id read history for loop detection,
+    # capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
+    # cap is hit faster than expected and capacity-related tests flake.
+    try:
+        from tools import file_tools as _ft_mod
+        with _ft_mod._read_tracker_lock:
+            _ft_mod._read_tracker.clear()
+        with _ft_mod._file_ops_lock:
+            _ft_mod._file_ops_cache.clear()
+    except Exception:
+        pass
+
+    yield
+
+
@pytest.fixture()
 def tmp_dir(tmp_path):
    """Provide a temporary directory that is cleaned up automatically."""
--- a/tests/cron/test_scheduler.py
+++ b/tests/cron/test_scheduler.py
@ -1580,3 +1580,128 @@ class TestParallelTick:
        end_s1 = [t for action, jid, t in call_times if action == "end" and jid == "s1"][0]
        start_s2 = [t for action, jid, t in call_times if action == "start" and jid == "s2"][0]
        assert start_s2 >= end_s1, "Jobs ran concurrently despite max_parallel=1"
+
+
+class TestDeliverResultTimeoutCancelsFuture:
+    """When future.result(timeout=60) raises TimeoutError in the live
+    adapter delivery path, _deliver_result must cancel the orphan
+    coroutine so it cannot duplicate-send after the standalone fallback.
+    """
+
+    def test_live_adapter_timeout_cancels_future_and_falls_back(self):
+        """End-to-end: live adapter hangs past the 60s budget, _deliver_result
+        patches the timeout down to a fast value, confirms future.cancel() fires,
+        and verifies the standalone fallback path still delivers."""
+        from gateway.config import Platform
+        from concurrent.futures import Future
+
+        # Live adapter whose send() coroutine never resolves within the budget
+        adapter = AsyncMock()
+        adapter.send.return_value = MagicMock(success=True)
+
+        pconfig = MagicMock()
+        pconfig.enabled = True
+        mock_cfg = MagicMock()
+        mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
+
+        loop = MagicMock()
+        loop.is_running.return_value = True
+
+        # A real concurrent.futures.Future so .cancel() has real semantics,
+        # but we override .result() to raise TimeoutError exactly like the
+        # 60s wait firing in production.
+        captured_future = Future()
+        cancel_calls = []
+        original_cancel = captured_future.cancel
+
+        def tracking_cancel():
+            cancel_calls.append(True)
+            return original_cancel()
+
+        captured_future.cancel = tracking_cancel
+        captured_future.result = MagicMock(side_effect=TimeoutError("timed out"))
+
+        def fake_run_coro(coro, _loop):
+            coro.close()
+            return captured_future
+
+        job = {
+            "id": "timeout-job",
+            "deliver": "origin",
+            "origin": {"platform": "telegram", "chat_id": "123"},
+        }
+
+        standalone_send = AsyncMock(return_value={"success": True})
+
+        with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
+             patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
+             patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro), \
+             patch("tools.send_message_tool._send_to_platform", new=standalone_send):
+            result = _deliver_result(
+                job,
+                "Hello world",
+                adapters={Platform.TELEGRAM: adapter},
+                loop=loop,
+            )
+
+        # 1. The orphan future was cancelled on timeout (the bug fix)
+        assert cancel_calls == [True], "future.cancel() must fire on TimeoutError"
+        # 2. The standalone fallback delivered — no double send, no silent drop
+        assert result is None, f"expected successful delivery, got error: {result!r}"
+        standalone_send.assert_awaited_once()
+
+
+class TestSendMediaTimeoutCancelsFuture:
+    """Same orphan-coroutine guarantee for _send_media_via_adapter's
+    future.result(timeout=30) call. If this times out mid-batch, the
+    in-flight coroutine must be cancelled before the next file is tried.
+    """
+
+    def test_media_send_timeout_cancels_future_and_continues(self):
+        """End-to-end: _send_media_via_adapter with a future whose .result()
+        raises TimeoutError. Assert cancel() fires and the loop proceeds
+        to the next file rather than hanging or crashing."""
+        from concurrent.futures import Future
+
+        adapter = MagicMock()
+        adapter.send_image_file = AsyncMock()
+        adapter.send_video = AsyncMock()
+
+        # First file: future that times out. Second file: future that resolves OK.
+        timeout_future = Future()
+        timeout_cancel_calls = []
+        original_cancel = timeout_future.cancel
+
+        def tracking_cancel():
+            timeout_cancel_calls.append(True)
+            return original_cancel()
+
+        timeout_future.cancel = tracking_cancel
+        timeout_future.result = MagicMock(side_effect=TimeoutError("timed out"))
+
+        ok_future = Future()
+        ok_future.set_result(MagicMock(success=True))
+
+        futures_iter = iter([timeout_future, ok_future])
+
+        def fake_run_coro(coro, _loop):
+            coro.close()
+            return next(futures_iter)
+
+        media_files = [
+            ("/tmp/slow.png", False),   # times out
+            ("/tmp/fast.mp4", False),   # succeeds
+        ]
+
+        loop = MagicMock()
+        job = {"id": "media-timeout"}
+
+        with patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
+            # Should not raise — the except Exception clause swallows the timeout
+            _send_media_via_adapter(adapter, "chat-1", media_files, None, loop, job)
+
+        # 1. The timed-out future was cancelled (the bug fix)
+        assert timeout_cancel_calls == [True], "future.cancel() must fire on TimeoutError"
+        # 2. Second file still got dispatched — one timeout doesn't abort the batch
+        adapter.send_video.assert_called_once()
+        assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4"
--- a/tests/gateway/test_internal_event_bypass_pairing.py
+++ b/tests/gateway/test_internal_event_bypass_pairing.py
@ -355,8 +355,17 @@ async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path
 async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
    """Verify the normal (non-internal) path still triggers pairing for unknown users."""
    import gateway.run as gateway_run
+    import gateway.pairing as pairing_mod

    monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
+    # gateway.pairing.PAIRING_DIR is a module-level constant captured at
+    # import time from whichever HERMES_HOME was set then. Per-test
+    # HERMES_HOME redirection in conftest doesn't retroactively move it.
+    # Override directly so pairing rate-limit state lives in this test's
+    # tmp_path (and so stale state from prior xdist workers can't leak in).
+    pairing_dir = tmp_path / "pairing"
+    pairing_dir.mkdir()
+    monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir)
    (tmp_path / "config.yaml").write_text("", encoding="utf-8")

    # Clear env vars that could let all users through (loaded by
--- a/tests/gateway/test_proxy_mode.py
+++ b/tests/gateway/test_proxy_mode.py
@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
 import pytest

 from gateway.config import Platform, StreamingConfig
+from gateway.platforms.base import resolve_proxy_url
 from gateway.run import GatewayRunner
 from gateway.session import SessionSource

@ -133,6 +134,15 @@ class TestGetProxyUrl:
            assert runner._get_proxy_url() is None


+class TestResolveProxyUrl:
+    def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch):
+        for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                    "https_proxy", "http_proxy", "all_proxy"):
+            monkeypatch.delenv(key, raising=False)
+        monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
+        assert resolve_proxy_url() == "socks5://127.0.0.1:1080/"
+
+
 class TestRunAgentProxyDispatch:
    """Test that _run_agent() delegates to proxy when configured."""

--- a/tests/gateway/test_runner_startup_failures.py
+++ b/tests/gateway/test_runner_startup_failures.py
@ -184,8 +184,15 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
        async def stop(self):
            return None

-    monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
-    monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
+    # get_running_pid returns 42 before we kill the old gateway, then None
+    # after remove_pid_file() clears the record (reflects real behavior).
+    _pid_state = {"alive": True}
+    def _mock_get_running_pid():
+        return 42 if _pid_state["alive"] else None
+    def _mock_remove_pid_file():
+        _pid_state["alive"] = False
+    monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
+    monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
    monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
    monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force)))
    monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
@ -253,8 +260,13 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
        async def stop(self):
            return None

-    monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
-    monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
+    _pid_state = {"alive": True}
+    def _mock_get_running_pid():
+        return 42 if _pid_state["alive"] else None
+    def _mock_remove_pid_file():
+        _pid_state["alive"] = False
+    monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
+    monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
    monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
    monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker)
    monkeypatch.setattr("gateway.status.terminate_pid", record_terminate)
--- a/tests/gateway/test_session.py
+++ b/tests/gateway/test_session.py
@ -356,6 +356,28 @@ class TestBuildSessionContextPrompt:
        assert "**User:** Alice" in prompt
        assert "Multi-user thread" not in prompt

+    def test_shared_non_thread_group_prompt_hides_single_user(self):
+        """Shared non-thread group sessions should avoid pinning one user."""
+        config = GatewayConfig(
+            platforms={
+                Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
+            },
+            group_sessions_per_user=False,
+        )
+        source = SessionSource(
+            platform=Platform.TELEGRAM,
+            chat_id="-1002285219667",
+            chat_name="Test Group",
+            chat_type="group",
+            user_name="Alice",
+        )
+        ctx = build_session_context(source, config)
+        prompt = build_session_context_prompt(ctx)
+
+        assert "Multi-user session" in prompt
+        assert "[sender name]" in prompt
+        assert "**User:** Alice" not in prompt
+
    def test_dm_thread_shows_user_not_multi(self):
        """DM threads are single-user and should show User, not multi-user note."""
        config = GatewayConfig(
--- a/tests/gateway/test_shared_group_sender_prefix.py
+++ b/tests/gateway/test_shared_group_sender_prefix.py
@ -0,0 +1,70 @@
+import pytest
+
+from gateway.config import GatewayConfig, Platform, PlatformConfig
+from gateway.platforms.base import MessageEvent
+from gateway.run import GatewayRunner
+from gateway.session import SessionSource
+
+
+def _make_runner(config: GatewayConfig) -> GatewayRunner:
+    runner = object.__new__(GatewayRunner)
+    runner.config = config
+    runner.adapters = {}
+    runner._model = "openai/gpt-4.1-mini"
+    runner._base_url = None
+    return runner
+
+
+@pytest.mark.asyncio
+async def test_preprocess_prefixes_sender_for_shared_non_thread_group_session():
+    runner = _make_runner(
+        GatewayConfig(
+            platforms={
+                Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
+            },
+            group_sessions_per_user=False,
+        )
+    )
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1002285219667",
+        chat_name="Test Group",
+        chat_type="group",
+        user_name="Alice",
+    )
+    event = MessageEvent(text="hello", source=source)
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "[Alice] hello"
+
+
+@pytest.mark.asyncio
+async def test_preprocess_keeps_plain_text_for_default_group_sessions():
+    runner = _make_runner(
+        GatewayConfig(
+            platforms={
+                Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
+            },
+        )
+    )
+    source = SessionSource(
+        platform=Platform.TELEGRAM,
+        chat_id="-1002285219667",
+        chat_name="Test Group",
+        chat_type="group",
+        user_name="Alice",
+    )
+    event = MessageEvent(text="hello", source=source)
+
+    result = await runner._prepare_inbound_message_text(
+        event=event,
+        source=source,
+        history=[],
+    )
+
+    assert result == "hello"
--- a/tests/gateway/test_signal.py
+++ b/tests/gateway/test_signal.py
@ -306,7 +306,13 @@ class TestSignalSessionSource:
 class TestSignalPhoneRedaction:
    @pytest.fixture(autouse=True)
    def _ensure_redaction_enabled(self, monkeypatch):
+        # agent.redact snapshots _REDACT_ENABLED at import time from the
+        # HERMES_REDACT_SECRETS env var. monkeypatch.delenv is too late —
+        # the module was already imported during test collection with
+        # whatever value was in the env then. Force the flag directly.
+        # See skill: xdist-cross-test-pollution Pattern 5.
        monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
+        monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)

    def test_us_number(self):
        from agent.redact import redact_sensitive_text
--- a/tests/gateway/test_status.py
+++ b/tests/gateway/test_status.py
@ -19,6 +19,30 @@ class TestGatewayPidState:
        assert isinstance(payload["argv"], list)
        assert payload["argv"]

+    def test_write_pid_file_is_atomic_against_concurrent_writers(self, tmp_path, monkeypatch):
+        """Regression: two concurrent --replace invocations must not both win.
+
+        Without O_CREAT|O_EXCL, two processes racing through start_gateway()'s
+        termination-wait would both write to gateway.pid, silently overwriting
+        each other and leaving multiple gateway instances alive (#11718).
+        """
+        import pytest
+
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        # First write wins.
+        status.write_pid_file()
+        assert (tmp_path / "gateway.pid").exists()
+
+        # Second write (simulating a racing --replace that missed the earlier
+        # guards) must raise FileExistsError rather than clobber the record.
+        with pytest.raises(FileExistsError):
+            status.write_pid_file()
+
+        # Original record is preserved.
+        payload = json.loads((tmp_path / "gateway.pid").read_text())
+        assert payload["pid"] == os.getpid()
+
    def test_get_running_pid_rejects_live_non_gateway_pid(self, tmp_path, monkeypatch):
        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
        pid_path = tmp_path / "gateway.pid"
--- a/tests/gateway/test_telegram_group_gating.py
+++ b/tests/gateway/test_telegram_group_gating.py
@ -71,7 +71,17 @@ def test_group_messages_can_require_direct_trigger_via_config():
    assert adapter._should_process_message(_group_message("hello everyone")) is False
    assert adapter._should_process_message(_group_message("hi @hermes_bot", entities=[_mention_entity("hi @hermes_bot")])) is True
    assert adapter._should_process_message(_group_message("replying", reply_to_bot=True)) is True
-    assert adapter._should_process_message(_group_message("/status"), is_command=True) is True
+    # Commands must also respect require_mention when it is enabled
+    assert adapter._should_process_message(_group_message("/status"), is_command=True) is False
+    # But commands with @mention still pass (Telegram emits a MENTION entity
+    # for /cmd@botname — the bot menu and python-telegram-bot's CommandHandler
+    # rely on this same mechanism)
+    assert adapter._should_process_message(
+        _group_message("/status@hermes_bot", entities=[_mention_entity("/status@hermes_bot")])
+    ) is True
+    # And commands still pass unconditionally when require_mention is disabled
+    adapter_no_mention = _make_adapter(require_mention=False)
+    assert adapter_no_mention._should_process_message(_group_message("/status"), is_command=True) is True


 def test_free_response_chats_bypass_mention_requirement():
--- a/tests/gateway/test_telegram_webhook_secret.py
+++ b/tests/gateway/test_telegram_webhook_secret.py
@ -0,0 +1,100 @@
+"""Tests for GHSA-3vpc-7q5r-276h — Telegram webhook secret required.
+
+Previously, when TELEGRAM_WEBHOOK_URL was set but TELEGRAM_WEBHOOK_SECRET
+was not, python-telegram-bot received secret_token=None and the webhook
+endpoint accepted any HTTP POST.
+
+The fix refuses to start the adapter in webhook mode without the secret.
+"""
+
+from __future__ import annotations
+
+import re
+import sys
+from pathlib import Path
+
+import pytest
+
+_repo = str(Path(__file__).resolve().parents[2])
+if _repo not in sys.path:
+    sys.path.insert(0, _repo)
+
+
+class TestTelegramWebhookSecretRequired:
+    """Direct source-level check of the webhook-secret guard.
+
+    The guard is embedded in TelegramAdapter.connect() and hard to isolate
+    via mocks (requires a full python-telegram-bot ApplicationBuilder
+    chain). These tests exercise it via source inspection — verifying the
+    check exists, raises RuntimeError with the advisory link, and only
+    fires in webhook mode. End-to-end validation is covered by CI +
+    manual deployment tests.
+    """
+
+    def _get_source(self) -> str:
+        path = Path(_repo) / "gateway" / "platforms" / "telegram.py"
+        return path.read_text(encoding="utf-8")
+
+    def test_webhook_branch_checks_secret(self):
+        """The webhook-mode branch of connect() must read
+        TELEGRAM_WEBHOOK_SECRET and refuse when empty."""
+        src = self._get_source()
+        # The guard must appear after TELEGRAM_WEBHOOK_URL is set
+        assert re.search(
+            r'TELEGRAM_WEBHOOK_SECRET.*?\.strip\(\)\s*\n\s*if not webhook_secret:',
+            src, re.DOTALL,
+        ), (
+            "TelegramAdapter.connect() must strip TELEGRAM_WEBHOOK_SECRET "
+            "and raise when the secret is empty — see GHSA-3vpc-7q5r-276h"
+        )
+
+    def test_guard_raises_runtime_error(self):
+        """The guard raises RuntimeError (not a silent log) so operators
+        see the failure at startup."""
+        src = self._get_source()
+        # Between the "if not webhook_secret:" line and the next blank
+        # line block, we should see a RuntimeError being raised
+        guard_match = re.search(
+            r'if not webhook_secret:\s*\n\s*raise\s+RuntimeError\(',
+            src,
+        )
+        assert guard_match, (
+            "Missing webhook secret must raise RuntimeError — silent "
+            "fall-through was the original GHSA-3vpc-7q5r-276h bypass"
+        )
+
+    def test_guard_message_includes_advisory_link(self):
+        """The RuntimeError message should reference the advisory so
+        operators can read the full context."""
+        src = self._get_source()
+        assert "GHSA-3vpc-7q5r-276h" in src, (
+            "Guard error message must cite the advisory for operator context"
+        )
+
+    def test_guard_message_explains_remediation(self):
+        """The error should tell the operator how to fix it."""
+        src = self._get_source()
+        # Should mention how to generate a secret
+        assert "openssl rand" in src or "TELEGRAM_WEBHOOK_SECRET=" in src, (
+            "Guard error message should show operators how to set "
+            "TELEGRAM_WEBHOOK_SECRET"
+        )
+
+    def test_polling_branch_has_no_secret_guard(self):
+        """Polling mode (else-branch) must NOT require the webhook secret —
+        polling authenticates via the bot token, not a webhook secret."""
+        src = self._get_source()
+        # The guard should appear inside the `if webhook_url:` branch,
+        # not the `else:` polling branch. Rough check: the raise is
+        # followed (within ~60 lines) by an `else:` that starts the
+        # polling branch, and there's no secret-check in that polling
+        # branch.
+        webhook_block = re.search(
+            r'if webhook_url:\s*\n(.*?)\n            else:\s*\n(.*?)\n',
+            src, re.DOTALL,
+        )
+        if webhook_block:
+            webhook_body = webhook_block.group(1)
+            polling_body = webhook_block.group(2)
+            assert "TELEGRAM_WEBHOOK_SECRET" in webhook_body
+            assert "TELEGRAM_WEBHOOK_SECRET" not in polling_body
--- a/tests/gateway/test_usage_command.py
+++ b/tests/gateway/test_usage_command.py
@ -175,3 +175,79 @@ class TestUsageCachedAgent:
            result = await runner._handle_usage_command(event)

        assert "Cost: included" in result
+
+
+class TestUsageAccountSection:
+    """Account-limits section appended to /usage output (PR #2486)."""
+
+    @pytest.mark.asyncio
+    async def test_usage_command_includes_account_section(self, monkeypatch):
+        agent = _make_mock_agent(provider="openai-codex")
+        agent.base_url = "https://chatgpt.com/backend-api/codex"
+        agent.api_key = "unused"
+        runner = _make_runner(SK, cached_agent=agent)
+        event = MagicMock()
+
+        monkeypatch.setattr(
+            "gateway.run.fetch_account_usage",
+            lambda provider, base_url=None, api_key=None: object(),
+        )
+        monkeypatch.setattr(
+            "gateway.run.render_account_usage_lines",
+            lambda snapshot, markdown=False: [
+                "📈 **Account limits**",
+                "Provider: openai-codex (Pro)",
+                "Session: 85% remaining (15% used)",
+            ],
+        )
+        with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
+             patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
+            mock_cost.return_value = MagicMock(amount_usd=None, status="included")
+            result = await runner._handle_usage_command(event)
+
+        assert "📊 **Session Token Usage**" in result
+        assert "📈 **Account limits**" in result
+        assert "Provider: openai-codex (Pro)" in result
+
+    @pytest.mark.asyncio
+    async def test_usage_command_uses_persisted_provider_when_agent_not_running(self, monkeypatch):
+        runner = _make_runner(SK)
+        runner._session_db = MagicMock()
+        runner._session_db.get_session.return_value = {
+            "billing_provider": "openai-codex",
+            "billing_base_url": "https://chatgpt.com/backend-api/codex",
+        }
+        session_entry = MagicMock()
+        session_entry.session_id = "sess-1"
+        runner.session_store.get_or_create_session.return_value = session_entry
+        runner.session_store.load_transcript.return_value = [
+            {"role": "user", "content": "earlier"},
+        ]
+
+        calls = {}
+
+        async def _fake_to_thread(fn, *args, **kwargs):
+            calls["args"] = args
+            calls["kwargs"] = kwargs
+            return fn(*args, **kwargs)
+
+        monkeypatch.setattr("gateway.run.asyncio.to_thread", _fake_to_thread)
+        monkeypatch.setattr(
+            "gateway.run.fetch_account_usage",
+            lambda provider, base_url=None, api_key=None: object(),
+        )
+        monkeypatch.setattr(
+            "gateway.run.render_account_usage_lines",
+            lambda snapshot, markdown=False: [
+                "📈 **Account limits**",
+                "Provider: openai-codex (Pro)",
+            ],
+        )
+
+        event = MagicMock()
+        result = await runner._handle_usage_command(event)
+
+        assert calls["args"] == ("openai-codex",)
+        assert calls["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
+        assert "📊 **Session Info**" in result
+        assert "📈 **Account limits**" in result
--- a/tests/hermes_cli/test_api_key_providers.py
+++ b/tests/hermes_cli/test_api_key_providers.py
@ -921,17 +921,13 @@ class TestKimiMoonshotModelListIsolation:
        leaked = set(moonshot_models) & coding_plan_only
        assert not leaked, f"Moonshot list contains Coding Plan-only models: {leaked}"

-    def test_moonshot_list_contains_shared_models(self):
+    def test_moonshot_list_non_empty(self):
        from hermes_cli.main import _PROVIDER_MODELS
-        moonshot_models = _PROVIDER_MODELS["moonshot"]
-        assert "kimi-k2.5" in moonshot_models
-        assert "kimi-k2-thinking" in moonshot_models
+        assert len(_PROVIDER_MODELS["moonshot"]) >= 1

-    def test_coding_plan_list_contains_plan_specific_models(self):
+    def test_coding_plan_list_non_empty(self):
        from hermes_cli.main import _PROVIDER_MODELS
-        coding_models = _PROVIDER_MODELS["kimi-coding"]
-        assert "kimi-for-coding" in coding_models
-        assert "kimi-k2-thinking-turbo" in coding_models
+        assert len(_PROVIDER_MODELS["kimi-coding"]) >= 1


 # =============================================================================
@ -944,14 +940,12 @@ class TestHuggingFaceModels:
    def test_main_provider_models_has_huggingface(self):
        from hermes_cli.main import _PROVIDER_MODELS
        assert "huggingface" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["huggingface"]
-        assert len(models) >= 6, "Expected at least 6 curated HF models"
+        assert len(_PROVIDER_MODELS["huggingface"]) >= 1

    def test_models_py_has_huggingface(self):
        from hermes_cli.models import _PROVIDER_MODELS
        assert "huggingface" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["huggingface"]
-        assert len(models) >= 6
+        assert len(_PROVIDER_MODELS["huggingface"]) >= 1

    def test_model_lists_match(self):
        """Model lists in main.py and models.py should be identical."""
--- a/tests/hermes_cli/test_arcee_provider.py
+++ b/tests/hermes_cli/test_arcee_provider.py
@ -115,12 +115,12 @@ class TestArceeCredentials:

 class TestArceeModelCatalog:
    def test_static_model_list(self):
+        """Arcee has a static _PROVIDER_MODELS catalog entry. Specific model
+        names change with releases and don't belong in tests.
+        """
        from hermes_cli.models import _PROVIDER_MODELS
        assert "arcee" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["arcee"]
-        assert "trinity-large-thinking" in models
-        assert "trinity-large-preview" in models
-        assert "trinity-mini" in models
+        assert len(_PROVIDER_MODELS["arcee"]) >= 1

    def test_canonical_provider_entry(self):
        from hermes_cli.models import CANONICAL_PROVIDERS
--- a/tests/hermes_cli/test_auth_commands.py
+++ b/tests/hermes_cli/test_auth_commands.py
@ -1011,3 +1011,466 @@ def test_seed_from_singletons_respects_codex_suppression(tmp_path, monkeypatch):
    # Verify the auth store was NOT modified (no auto-import happened)
    after = json.loads((hermes_home / "auth.json").read_text())
    assert "openai-codex" not in after.get("providers", {})
+
+
+def test_auth_remove_env_seeded_suppresses_shell_exported_var(tmp_path, monkeypatch, capsys):
+    """`hermes auth remove xai 1` must stick even when the env var is exported
+    by the shell (not written into ~/.hermes/.env).  Before PR for #13371 the
+    removal silently restored on next load_pool() because _seed_from_env()
+    re-read os.environ.  Now env:<VAR> is suppressed in auth.json.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Simulate shell export (NOT written to .env)
+    monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
+    (hermes_home / ".env").write_text("")
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "xai": [{
+                    "id": "env-1",
+                    "label": "XAI_API_KEY",
+                    "auth_type": "api_key",
+                    "priority": 0,
+                    "source": "env:XAI_API_KEY",
+                    "access_token": "sk-xai-shell-export",
+                    "base_url": "https://api.x.ai/v1",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth_commands import auth_remove_command
+    auth_remove_command(SimpleNamespace(provider="xai", target="1"))
+
+    # Suppression marker written
+    after = json.loads((hermes_home / "auth.json").read_text())
+    assert "env:XAI_API_KEY" in after.get("suppressed_sources", {}).get("xai", [])
+
+    # Diagnostic printed pointing at the shell
+    out = capsys.readouterr().out
+    assert "still set in your shell environment" in out
+    assert "Cleared XAI_API_KEY from .env" not in out  # wasn't in .env
+
+    # Fresh simulation: shell re-exports, reload pool
+    monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
+    from agent.credential_pool import load_pool
+    pool = load_pool("xai")
+    assert not pool.has_credentials(), "pool must stay empty — env:XAI_API_KEY suppressed"
+
+
+def test_auth_remove_env_seeded_dotenv_only_no_shell_hint(tmp_path, monkeypatch, capsys):
+    """When the env var lives only in ~/.hermes/.env (not the shell), the
+    shell-hint should NOT be printed — avoid scaring the user about a
+    non-existent shell export.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    # Key ONLY in .env, shell must not have it
+    monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False)
+    (hermes_home / ".env").write_text("DEEPSEEK_API_KEY=sk-ds-only\n")
+    # Mimic load_env() populating os.environ
+    monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-ds-only")
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "deepseek": [{
+                    "id": "env-1",
+                    "label": "DEEPSEEK_API_KEY",
+                    "auth_type": "api_key",
+                    "priority": 0,
+                    "source": "env:DEEPSEEK_API_KEY",
+                    "access_token": "sk-ds-only",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth_commands import auth_remove_command
+    auth_remove_command(SimpleNamespace(provider="deepseek", target="1"))
+
+    out = capsys.readouterr().out
+    assert "Cleared DEEPSEEK_API_KEY from .env" in out
+    assert "still set in your shell environment" not in out
+    assert (hermes_home / ".env").read_text().strip() == ""
+
+
+def test_auth_add_clears_env_suppression_for_provider(tmp_path, monkeypatch):
+    """Re-adding a credential via `hermes auth add <provider>` clears any
+    env:<VAR> suppression marker — strong signal the user wants auth back.
+    Matches the Codex device_code re-link behaviour.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.delenv("XAI_API_KEY", raising=False)
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "providers": {},
+            "suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_add_command
+
+    assert is_source_suppressed("xai", "env:XAI_API_KEY") is True
+    auth_add_command(SimpleNamespace(
+        provider="xai", auth_type="api_key",
+        api_key="sk-xai-manual", label="manual",
+    ))
+    assert is_source_suppressed("xai", "env:XAI_API_KEY") is False
+
+
+def test_seed_from_env_respects_env_suppression(tmp_path, monkeypatch):
+    """_seed_from_env() must skip env:<VAR> sources that the user suppressed
+    via `hermes auth remove`.  This is the gate that prevents shell-exported
+    keys from resurrecting removed credentials.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
+    }))
+
+    from agent.credential_pool import _seed_from_env
+
+    entries = []
+    changed, active = _seed_from_env("xai", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_env_respects_openrouter_suppression(tmp_path, monkeypatch):
+    """OpenRouter is the special-case branch in _seed_from_env; verify it
+    honours suppression too.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+    monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-shell-export")
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"openrouter": ["env:OPENROUTER_API_KEY"]},
+    }))
+
+    from agent.credential_pool import _seed_from_env
+
+    entries = []
+    changed, active = _seed_from_env("openrouter", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+# =============================================================================
+# Unified credential-source stickiness — every source Hermes reads from has a
+# registered RemovalStep in agent.credential_sources, and every seeding path
+# gates on is_source_suppressed.  Below: one test per source proving remove
+# sticks across a fresh load_pool() call.
+# =============================================================================
+
+
+def test_seed_from_singletons_respects_nous_suppression(tmp_path, monkeypatch):
+    """nous device_code must not re-seed from auth.json when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {"nous": {"access_token": "tok", "refresh_token": "r", "expires_at": 9999999999}},
+        "suppressed_sources": {"nous": ["device_code"]},
+    }))
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("nous", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_singletons_respects_copilot_suppression(tmp_path, monkeypatch):
+    """copilot gh_cli must not re-seed when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"copilot": ["gh_cli"]},
+    }))
+
+    # Stub resolve_copilot_token to return a live token
+    import hermes_cli.copilot_auth as ca
+    monkeypatch.setattr(ca, "resolve_copilot_token", lambda: ("ghp_fake", "gh auth token"))
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("copilot", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_singletons_respects_qwen_suppression(tmp_path, monkeypatch):
+    """qwen-oauth qwen-cli must not re-seed from ~/.qwen/oauth_creds.json when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"qwen-oauth": ["qwen-cli"]},
+    }))
+
+    import hermes_cli.auth as ha
+    monkeypatch.setattr(ha, "resolve_qwen_runtime_credentials", lambda **kw: {
+        "api_key": "tok", "source": "qwen-cli", "base_url": "https://q",
+    })
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("qwen-oauth", entries)
+    assert changed is False
+    assert entries == []
+    assert active == set()
+
+
+def test_seed_from_singletons_respects_hermes_pkce_suppression(tmp_path, monkeypatch):
+    """anthropic hermes_pkce must not re-seed from ~/.hermes/.anthropic_oauth.json when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    import yaml
+    (hermes_home / "config.yaml").write_text(yaml.dump({"model": {"provider": "anthropic", "model": "claude"}}))
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {"anthropic": ["hermes_pkce"]},
+    }))
+
+    # Stub the readers so only hermes_pkce is "available"; claude_code returns None
+    import agent.anthropic_adapter as aa
+    monkeypatch.setattr(aa, "read_hermes_oauth_credentials", lambda: {
+        "accessToken": "tok", "refreshToken": "r", "expiresAt": 9999999999000,
+    })
+    monkeypatch.setattr(aa, "read_claude_code_credentials", lambda: None)
+
+    from agent.credential_pool import _seed_from_singletons
+    entries = []
+    changed, active = _seed_from_singletons("anthropic", entries)
+    # hermes_pkce suppressed, claude_code returns None → nothing should be seeded
+    assert entries == []
+    assert "hermes_pkce" not in active
+
+
+def test_seed_custom_pool_respects_config_suppression(tmp_path, monkeypatch):
+    """Custom provider config:<name> source must not re-seed when suppressed."""
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    import yaml
+    (hermes_home / "config.yaml").write_text(yaml.dump({
+        "model": {},
+        "custom_providers": [
+            {"name": "my", "base_url": "https://c.example.com", "api_key": "sk-custom"},
+        ],
+    }))
+
+    from agent.credential_pool import _seed_custom_pool, get_custom_provider_pool_key
+    pool_key = get_custom_provider_pool_key("https://c.example.com")
+
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1,
+        "providers": {},
+        "suppressed_sources": {pool_key: ["config:my"]},
+    }))
+
+    entries = []
+    changed, active = _seed_custom_pool(pool_key, entries)
+    assert changed is False
+    assert entries == []
+    assert "config:my" not in active
+
+
+def test_credential_sources_registry_has_expected_steps():
+    """Sanity check — the registry contains the expected RemovalSteps.
+
+    Guards against accidentally dropping a step during future refactors.
+    If you add a new credential source, add it to the expected set below.
+    """
+    from agent.credential_sources import _REGISTRY
+
+    descriptions = {step.description for step in _REGISTRY}
+    expected = {
+        "gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
+        "Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
+        "~/.claude/.credentials.json",
+        "~/.hermes/.anthropic_oauth.json",
+        "auth.json providers.nous",
+        "auth.json providers.openai-codex + ~/.codex/auth.json",
+        "~/.qwen/oauth_creds.json",
+        "Custom provider config.yaml api_key field",
+    }
+    assert descriptions == expected, f"Registry mismatch. Got: {descriptions}"
+
+
+def test_credential_sources_find_step_returns_none_for_manual():
+    """Manual entries have nothing external to clean up — no step registered."""
+    from agent.credential_sources import find_removal_step
+    assert find_removal_step("openrouter", "manual") is None
+    assert find_removal_step("xai", "manual") is None
+
+
+def test_credential_sources_find_step_copilot_before_generic_env(tmp_path, monkeypatch):
+    """copilot env:GH_TOKEN must dispatch to the copilot step, not the
+    generic env-var step.  The copilot step handles the duplicate-source
+    problem (same token seeded as both gh_cli and env:<VAR>); the generic
+    env step would only suppress one of the variants.
+    """
+    from agent.credential_sources import find_removal_step
+
+    step = find_removal_step("copilot", "env:GH_TOKEN")
+    assert step is not None
+    assert "copilot" in step.description.lower() or "gh" in step.description.lower()
+
+    # Generic step still matches any other provider's env var
+    step = find_removal_step("xai", "env:XAI_API_KEY")
+    assert step is not None
+    assert "env-seeded" in step.description.lower()
+
+
+def test_auth_remove_copilot_suppresses_all_variants(tmp_path, monkeypatch):
+    """Removing any copilot source must suppress gh_cli + all env:* variants
+    so the duplicate-seed paths don't resurrect the credential.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "credential_pool": {
+                "copilot": [{
+                    "id": "c1",
+                    "label": "gh auth token",
+                    "auth_type": "api_key",
+                    "priority": 0,
+                    "source": "gh_cli",
+                    "access_token": "ghp_fake",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_remove_command
+
+    auth_remove_command(SimpleNamespace(provider="copilot", target="1"))
+
+    assert is_source_suppressed("copilot", "gh_cli")
+    assert is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
+    assert is_source_suppressed("copilot", "env:GH_TOKEN")
+    assert is_source_suppressed("copilot", "env:GITHUB_TOKEN")
+
+
+def test_auth_add_clears_all_suppressions_including_non_env(tmp_path, monkeypatch):
+    """Re-adding a credential via `hermes auth add <provider>` clears ALL
+    suppression markers for the provider, not just env:*.  This matches
+    the single "re-engage" semantic — the user wants auth back, period.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "providers": {},
+            "suppressed_sources": {
+                "copilot": ["gh_cli", "env:GH_TOKEN", "env:COPILOT_GITHUB_TOKEN"],
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_add_command
+
+    auth_add_command(SimpleNamespace(
+        provider="copilot", auth_type="api_key",
+        api_key="ghp-manual", label="m",
+    ))
+
+    assert not is_source_suppressed("copilot", "gh_cli")
+    assert not is_source_suppressed("copilot", "env:GH_TOKEN")
+    assert not is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
+
+
+def test_auth_remove_codex_manual_device_code_suppresses_canonical(tmp_path, monkeypatch):
+    """Removing a manual:device_code entry (from `hermes auth add openai-codex`)
+    must suppress the canonical ``device_code`` key, not ``manual:device_code``.
+    The re-seed gate in _seed_from_singletons checks ``device_code``.
+    """
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    _write_auth_store(
+        tmp_path,
+        {
+            "version": 1,
+            "providers": {"openai-codex": {"tokens": {"access_token": "t", "refresh_token": "r"}}},
+            "credential_pool": {
+                "openai-codex": [{
+                    "id": "cdx",
+                    "label": "manual-codex",
+                    "auth_type": "oauth",
+                    "priority": 0,
+                    "source": "manual:device_code",
+                    "access_token": "t",
+                }]
+            },
+        },
+    )
+
+    from types import SimpleNamespace
+    from hermes_cli.auth import is_source_suppressed
+    from hermes_cli.auth_commands import auth_remove_command
+
+    auth_remove_command(SimpleNamespace(provider="openai-codex", target="1"))
+    assert is_source_suppressed("openai-codex", "device_code")
--- a/tests/hermes_cli/test_config.py
+++ b/tests/hermes_cli/test_config.py
@ -459,7 +459,8 @@ class TestCustomProviderCompatibility:
            migrate_config(interactive=False, quiet=True)
            raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))

-        assert raw["_config_version"] == 21
+        from hermes_cli.config import DEFAULT_CONFIG
+        assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
        assert raw["providers"]["openai-direct"] == {
            "api": "https://api.openai.com/v1",
            "api_key": "test-key",
@ -501,7 +502,8 @@ class TestCustomProviderCompatibility:
        assert compatible[0]["provider_key"] == "openai-direct"
        assert compatible[0]["api_mode"] == "codex_responses"

-    def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path):
+    def test_compatible_custom_providers_prefers_base_url_then_url_then_api(self, tmp_path):
+        """URL field precedence is base_url > url > api (PR #9332)."""
        config_path = tmp_path / "config.yaml"
        config_path.write_text(
            yaml.safe_dump(
@ -526,7 +528,7 @@ class TestCustomProviderCompatibility:
        assert compatible == [
            {
                "name": "My Provider",
-                "base_url": "https://api.example.com/v1",
+                "base_url": "https://base.example.com/v1",
                "provider_key": "my-provider",
            }
        ]
@ -606,7 +608,8 @@ class TestInterimAssistantMessageConfig:
            migrate_config(interactive=False, quiet=True)
            raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))

-        assert raw["_config_version"] == 21
+        from hermes_cli.config import DEFAULT_CONFIG
+        assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
        assert raw["display"]["tool_progress"] == "off"
        assert raw["display"]["interim_assistant_messages"] is True

@ -626,7 +629,8 @@ class TestDiscordChannelPromptsConfig:
            migrate_config(interactive=False, quiet=True)
            raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))

-        assert raw["_config_version"] == 21
+        from hermes_cli.config import DEFAULT_CONFIG
+        assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
        assert raw["discord"]["auto_thread"] is True
        assert raw["discord"]["channel_prompts"] == {}

--- a/tests/hermes_cli/test_gemini_provider.py
+++ b/tests/hermes_cli/test_gemini_provider.py
@ -125,18 +125,12 @@ class TestGeminiCredentials:
 # ── Model Catalog ──

 class TestGeminiModelCatalog:
-    def test_provider_models_exist(self):
+    def test_provider_entry_exists(self):
+        """Gemini provider has a model catalog entry. Specific model names
+        are data that changes with Google releases and don't belong in tests.
+        """
        assert "gemini" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["gemini"]
-        assert "gemini-2.5-pro" in models
-        assert "gemini-2.5-flash" in models
-        assert "gemma-4-31b-it" not in models
-
-    def test_provider_models_has_3x(self):
-        models = _PROVIDER_MODELS["gemini"]
-        assert "gemini-3.1-pro-preview" in models
-        assert "gemini-3-flash-preview" in models
-        assert "gemini-3.1-flash-lite-preview" in models
+        assert len(_PROVIDER_MODELS["gemini"]) >= 1

    def test_provider_label(self):
        assert "gemini" in _PROVIDER_LABELS
--- a/tests/hermes_cli/test_model_validation.py
+++ b/tests/hermes_cli/test_model_validation.py
@ -457,29 +457,62 @@ class TestValidateApiNotFound:
        assert "not found" in result["message"]


-# -- validate — API unreachable — reject with guidance ----------------
+# -- validate — API unreachable — soft-accept via catalog or warning --------

 class TestValidateApiFallback:
-    def test_any_model_rejected_when_api_down(self):
-        result = _validate("anthropic/claude-opus-4.6", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
+    """When /models is unreachable, the validator must accept the model (with
+    a warning) rather than reject it outright — otherwise provider switches
+    fail in the gateway for any provider whose /models endpoint is down or
+    doesn't exist (e.g. opencode-go returns 404 HTML).

-    def test_unknown_model_also_rejected_when_api_down(self):
-        result = _validate("anthropic/claude-next-gen", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
-        assert "could not reach" in result["message"].lower()
+    Two paths:
+      1. Provider has a curated catalog (``_PROVIDER_MODELS`` / live fetch):
+         validate against it (recognized=True for known models,
+         recognized=False with 'Note:' for unknown).
+      2. Provider has no catalog: accept with a generic 'Note:' warning.

-    def test_zai_model_rejected_when_api_down(self):
+    In both cases ``accepted`` and ``persist`` must be True so the gateway can
+    write the ``_session_model_overrides`` entry.
+    """
+
+    def test_known_model_accepted_via_catalog_when_api_down(self):
+        # Force the openrouter catalog lookup to return a deterministic list.
+        with patch(
+            "hermes_cli.models.provider_model_ids",
+            return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
+        ):
+            result = _validate("anthropic/claude-opus-4.6", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is True
+
+    def test_unknown_model_accepted_with_note_when_api_down(self):
+        with patch(
+            "hermes_cli.models.provider_model_ids",
+            return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
+        ):
+            result = _validate("anthropic/claude-next-gen", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is False
+        # Message flags it as unverified against the catalog.
+        assert "not found" in result["message"].lower() or "note" in result["message"].lower()
+
+    def test_zai_known_model_accepted_via_catalog_when_api_down(self):
+        # glm-5 is in the zai curated catalog (_PROVIDER_MODELS["zai"]).
        result = _validate("glm-5", provider="zai", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is True

-    def test_unknown_provider_rejected_when_api_down(self):
-        result = _validate("some-model", provider="totally-unknown", api_models=None)
-        assert result["accepted"] is False
-        assert result["persist"] is False
+    def test_unknown_provider_soft_accepted_when_api_down(self):
+        # No catalog for unknown providers — soft-accept with a Note.
+        with patch("hermes_cli.models.provider_model_ids", return_value=[]):
+            result = _validate("some-model", provider="totally-unknown", api_models=None)
+        assert result["accepted"] is True
+        assert result["persist"] is True
+        assert result["recognized"] is False
+        assert "note" in result["message"].lower()

    def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self):
        with patch(
--- a/tests/hermes_cli/test_models.py
+++ b/tests/hermes_cli/test_models.py
@ -88,6 +88,131 @@ class TestFetchOpenRouterModels:

        assert models == OPENROUTER_MODELS

+    def test_filters_out_models_without_tool_support(self, monkeypatch):
+        """Models whose supported_parameters omits 'tools' must not appear in the picker.
+
+        hermes-agent is tool-calling-first — surfacing a non-tool model leads to
+        immediate runtime failures when the user selects it. Ported from
+        Kilo-Org/kilocode#9068.
+        """
+        class _Resp:
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def read(self):
+                # opus-4.6 advertises tools → kept
+                # nano-image has explicit supported_parameters that OMITS tools → dropped
+                # qwen3.6-plus advertises tools → kept
+                return (
+                    b'{"data":['
+                    b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"},'
+                    b'"supported_parameters":["temperature","tools","tool_choice"]},'
+                    b'{"id":"google/gemini-3-pro-image-preview","pricing":{"prompt":"0.00001","completion":"0.00003"},'
+                    b'"supported_parameters":["temperature","response_format"]},'
+                    b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"},'
+                    b'"supported_parameters":["tools","temperature"]}'
+                    b']}'
+                )
+
+        # Include the image-only id in the curated list so it has a chance to be surfaced.
+        monkeypatch.setattr(
+            _models_mod,
+            "OPENROUTER_MODELS",
+            [
+                ("anthropic/claude-opus-4.6", ""),
+                ("google/gemini-3-pro-image-preview", ""),
+                ("qwen/qwen3.6-plus", ""),
+            ],
+        )
+        monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
+        with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
+            models = fetch_openrouter_models(force_refresh=True)
+
+        ids = [mid for mid, _ in models]
+        assert "anthropic/claude-opus-4.6" in ids
+        assert "qwen/qwen3.6-plus" in ids
+        # Image-only model advertised supported_parameters WITHOUT tools → must be dropped.
+        assert "google/gemini-3-pro-image-preview" not in ids
+
+    def test_permissive_when_supported_parameters_missing(self, monkeypatch):
+        """Models missing the supported_parameters field keep appearing in the picker.
+
+        Some OpenRouter-compatible gateways (Nous Portal, private mirrors, older
+        catalog snapshots) don't populate supported_parameters. Treating missing
+        as 'unknown → allow' prevents the picker from silently emptying on
+        those gateways.
+        """
+        class _Resp:
+            def __enter__(self):
+                return self
+
+            def __exit__(self, exc_type, exc, tb):
+                return False
+
+            def read(self):
+                # No supported_parameters field at all on either entry.
+                return (
+                    b'{"data":['
+                    b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"}},'
+                    b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"}}'
+                    b']}'
+                )
+
+        monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
+        with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
+            models = fetch_openrouter_models(force_refresh=True)
+
+        ids = [mid for mid, _ in models]
+        assert "anthropic/claude-opus-4.6" in ids
+        assert "qwen/qwen3.6-plus" in ids
+
+
+class TestOpenRouterToolSupportHelper:
+    """Unit tests for _openrouter_model_supports_tools (Kilo port #9068)."""
+
+    def test_tools_in_supported_parameters(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": ["temperature", "tools"]}
+        ) is True
+
+    def test_tools_missing_from_supported_parameters(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": ["temperature", "response_format"]}
+        ) is False
+
+    def test_supported_parameters_absent_is_permissive(self):
+        """Missing field → allow (so older / non-OR gateways still work)."""
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools({"id": "x"}) is True
+
+    def test_supported_parameters_none_is_permissive(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools({"id": "x", "supported_parameters": None}) is True
+
+    def test_supported_parameters_malformed_is_permissive(self):
+        """Malformed (non-list) value → allow rather than silently drop."""
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": "tools,temperature"}
+        ) is True
+
+    def test_non_dict_item_is_permissive(self):
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(None) is True
+        assert _openrouter_model_supports_tools("anthropic/claude-opus-4.6") is True
+
+    def test_empty_supported_parameters_list_drops_model(self):
+        """Explicit empty list → no tools → drop."""
+        from hermes_cli.models import _openrouter_model_supports_tools
+        assert _openrouter_model_supports_tools(
+            {"id": "x", "supported_parameters": []}
+        ) is False
+

 class TestFindOpenrouterSlug:
    def test_exact_match(self):
--- a/tests/hermes_cli/test_opencode_go_in_model_list.py
+++ b/tests/hermes_cli/test_opencode_go_in_model_list.py
@ -15,7 +15,7 @@ def test_opencode_go_appears_when_api_key_set():
    opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
    
    assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set"
-    assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
+    assert opencode_go["models"] == ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
    # opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when
    # models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when
    # the API is unavailable, e.g. in CI).
--- a/tests/hermes_cli/test_opencode_go_validation_fallback.py
+++ b/tests/hermes_cli/test_opencode_go_validation_fallback.py
@ -0,0 +1,133 @@
+"""Tests for the static-catalog fallback in validate_requested_model.
+
+OpenCode Go and OpenCode Zen publish an OpenAI-compatible API at paths that do
+NOT expose ``/models`` (the path returns the marketing site's HTML 404).  This
+caused ``validate_requested_model`` to return ``accepted=False`` for every
+model on those providers, which in turn made ``switch_model()`` fail and the
+gateway's ``/model <name> --provider opencode-go`` command never write to
+``_session_model_overrides``.
+
+These tests cover the catalog-fallback path: when ``fetch_api_models`` returns
+``None``, the validator must consult ``provider_model_ids()`` for the provider
+(populated from ``_PROVIDER_MODELS``) rather than rejecting outright.
+"""
+
+from unittest.mock import patch
+
+from hermes_cli.models import validate_requested_model
+
+
+_UNREACHABLE_PROBE = {
+    "models": None,
+    "probed_url": "https://opencode.ai/zen/go/v1/models",
+    "resolved_base_url": "https://opencode.ai/zen/go/v1",
+    "suggested_base_url": None,
+    "used_fallback": False,
+}
+
+
+def _patched(func):
+    """Decorator: force fetch_api_models / probe_api_models to simulate an
+    unreachable /models endpoint, proving the catalog path is used."""
+    def wrapper(*args, **kwargs):
+        with patch("hermes_cli.models.fetch_api_models", return_value=None), \
+             patch("hermes_cli.models.probe_api_models", return_value=_UNREACHABLE_PROBE):
+            return func(*args, **kwargs)
+    wrapper.__name__ = func.__name__
+    return wrapper
+
+
+# ---------------------------------------------------------------------------
+# opencode-go: curated catalog in _PROVIDER_MODELS
+# ---------------------------------------------------------------------------
+
+
+@_patched
+def test_opencode_go_known_model_accepted():
+    """A model present in the opencode-go curated catalog must be accepted
+    even when /models is unreachable."""
+    result = validate_requested_model("kimi-k2.6", "opencode-go")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is True
+    assert result["message"] is None
+
+
+@_patched
+def test_opencode_go_known_model_case_insensitive():
+    """Catalog lookup is case-insensitive."""
+    result = validate_requested_model("KIMI-K2.6", "opencode-go")
+    assert result["accepted"] is True
+    assert result["recognized"] is True
+
+
+@_patched
+def test_opencode_go_typo_auto_corrected():
+    """A close typo (>= 0.9 similarity) is auto-corrected to the catalog
+    entry."""
+    # 'kimi-k2.55' vs 'kimi-k2.5' ratio ≈ 0.95 — within the 0.9 cutoff.
+    result = validate_requested_model("kimi-k2.55", "opencode-go")
+    assert result["accepted"] is True
+    assert result["recognized"] is True
+    assert result.get("corrected_model") == "kimi-k2.5"
+
+
+@_patched
+def test_opencode_go_unknown_model_accepted_with_suggestion():
+    """An unknown model that has a medium-similarity match (>= 0.5 but < 0.9)
+    is accepted with recognized=False and a 'similar models' hint.  The key
+    invariant: the gateway MUST be able to persist this override, so
+    accepted/persist must both be True."""
+    # 'kimi-k3-preview' vs 'kimi-k2.6' — similar enough to suggest, not to auto-correct.
+    result = validate_requested_model("kimi-k3-preview", "opencode-go")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is False
+    assert "kimi-k3-preview" in result["message"]
+    assert "curated catalog" in result["message"]
+
+
+@_patched
+def test_opencode_go_totally_unknown_model_still_accepted():
+    """A model with zero similarity to the catalog is still accepted (no
+    suggestion line) so the user can try a model that hasn't made it into the
+    curated list yet."""
+    result = validate_requested_model("some-brand-new-model", "opencode-go")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is False
+    # No suggestion text (no close matches)
+    assert "Similar models" not in result["message"]
+    assert "opencode" in result["message"].lower() or "opencode go" in result["message"].lower()
+
+
+# ---------------------------------------------------------------------------
+# opencode-zen: same pattern as opencode-go
+# ---------------------------------------------------------------------------
+
+
+@_patched
+def test_opencode_zen_known_model_accepted():
+    """opencode-zen also uses _PROVIDER_MODELS; kimi-k2 is in its catalog."""
+    result = validate_requested_model("kimi-k2", "opencode-zen")
+    assert result["accepted"] is True
+    assert result["recognized"] is True
+
+
+# ---------------------------------------------------------------------------
+# Unknown provider with no catalog: soft-accept (honors the comment's intent)
+# ---------------------------------------------------------------------------
+
+
+@_patched
+def test_provider_without_catalog_accepts_with_warning():
+    """When a provider has no entry in _PROVIDER_MODELS and /models is
+    unreachable, accept the model with a 'Note:' warning rather than reject.
+    This matches the in-code comment: 'Accept and persist, but warn so typos
+    don't silently break things.'"""
+    # Use a made-up provider name that won't resolve to any catalog.
+    result = validate_requested_model("some-model", "provider-that-does-not-exist")
+    assert result["accepted"] is True
+    assert result["persist"] is True
+    assert result["recognized"] is False
+    assert "Note:" in result["message"]
--- a/tests/hermes_cli/test_runtime_provider_resolution.py
+++ b/tests/hermes_cli/test_runtime_provider_resolution.py
@ -1412,3 +1412,90 @@ def test_named_custom_runtime_no_model_when_absent(monkeypatch):

    resolved = rp.resolve_runtime_provider(requested="my-server")
    assert "model" not in resolved
+
+
+# ---------------------------------------------------------------------------
+# GHSA-76xc-57q6-vm5m — Ollama URL substring leak
+#
+# Same bug class as the previously-fixed GHSA-xf8p-v2cg-h7h5 (OpenRouter).
+# _resolve_openrouter_runtime's custom-endpoint branch selects OLLAMA_API_KEY
+# when the base_url "looks like" ollama.com. Previous implementation used
+# raw substring match; a custom base_url whose PATH or look-alike host
+# merely contained "ollama.com" leaked OLLAMA_API_KEY to that endpoint.
+# Fix: use base_url_host_matches (same helper as the OpenRouter sweep).
+# ---------------------------------------------------------------------------
+
+class TestOllamaUrlSubstringLeak:
+    """Call-site regression tests for the fix in _resolve_openrouter_runtime."""
+
+    def _make_cfg(self, base_url):
+        return {"base_url": base_url, "api_key": "", "provider": "custom"}
+
+    def test_ollama_key_not_leaked_to_path_injection(self, monkeypatch):
+        """http://127.0.0.1:9000/ollama.com/v1 — attacker endpoint with
+        ollama.com in PATH. Must resolve to OPENAI_API_KEY, not OLLAMA_API_KEY."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "http://127.0.0.1:9000/ollama.com/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert "ol-SECRET" not in resolved["api_key"], (
+            "OLLAMA_API_KEY must not be sent to an endpoint whose "
+            "hostname is not ollama.com (GHSA-76xc-57q6-vm5m)"
+        )
+        assert resolved["api_key"] == "oa-secret"
+
+    def test_ollama_key_not_leaked_to_lookalike_host(self, monkeypatch):
+        """ollama.com.attacker.test — look-alike host. OLLAMA_API_KEY
+        must not be sent."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "http://ollama.com.attacker.test:9000/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert "ol-SECRET" not in resolved["api_key"]
+        assert resolved["api_key"] == "oa-secret"
+
+    def test_ollama_key_sent_to_genuine_ollama_com(self, monkeypatch):
+        """https://ollama.com/v1 — legit Ollama Cloud. OLLAMA_API_KEY
+        should be used."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://ollama.com/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert resolved["api_key"] == "ol-legit-key"
+
+    def test_ollama_key_sent_to_ollama_subdomain(self, monkeypatch):
+        """https://api.ollama.com/v1 — legit subdomain."""
+        monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
+        monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
+        monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
+        monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
+            "https://api.ollama.com/v1"
+        ))
+        monkeypatch.setattr(rp, "load_pool", lambda provider: None)
+        monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
+
+        resolved = rp.resolve_runtime_provider(requested="custom")
+
+        assert resolved["api_key"] == "ol-legit-key"
--- a/tests/hermes_cli/test_web_server_host_header.py
+++ b/tests/hermes_cli/test_web_server_host_header.py
@ -0,0 +1,148 @@
+"""Tests for GHSA-ppp5-vxwm-4cf7 — Host-header validation.
+
+DNS rebinding defence: a victim browser that has the dashboard open
+could be tricked into fetching from an attacker-controlled hostname
+that TTL-flips to 127.0.0.1. Same-origin / CORS checks won't help —
+the browser now treats the attacker origin as same-origin. Validating
+the Host header at the application layer rejects the attack.
+"""
+
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+import pytest
+
+_repo = str(Path(__file__).resolve().parents[1])
+if _repo not in sys.path:
+    sys.path.insert(0, _repo)
+
+
+class TestHostHeaderValidator:
+    """Unit test the _is_accepted_host helper directly — cheaper and
+    more thorough than spinning up the full FastAPI app."""
+
+    def test_loopback_bind_accepts_loopback_names(self):
+        from hermes_cli.web_server import _is_accepted_host
+
+        for bound in ("127.0.0.1", "localhost", "::1"):
+            for host_header in (
+                "127.0.0.1", "127.0.0.1:9119",
+                "localhost", "localhost:9119",
+                "[::1]", "[::1]:9119",
+            ):
+                assert _is_accepted_host(host_header, bound), (
+                    f"bound={bound} must accept host={host_header}"
+                )
+
+    def test_loopback_bind_rejects_attacker_hostnames(self):
+        """The core rebinding defence: attacker-controlled hosts that
+        TTL-flip to 127.0.0.1 must be rejected."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        for bound in ("127.0.0.1", "localhost"):
+            for attacker in (
+                "evil.example",
+                "evil.example:9119",
+                "rebind.attacker.test:80",
+                "localhost.attacker.test",  # subdomain trick
+                "127.0.0.1.evil.test",  # lookalike IP prefix
+                "",  # missing Host
+            ):
+                assert not _is_accepted_host(attacker, bound), (
+                    f"bound={bound} must reject attacker host={attacker!r}"
+                )
+
+    def test_zero_zero_bind_accepts_anything(self):
+        """0.0.0.0 means operator explicitly opted into all-interfaces
+        (requires --insecure). No Host-layer defence is possible — rely
+        on operator network controls."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        for host in ("10.0.0.5", "evil.example", "my-server.corp.net"):
+            assert _is_accepted_host(host, "0.0.0.0")
+            assert _is_accepted_host(host + ":9119", "0.0.0.0")
+
+    def test_explicit_non_loopback_bind_requires_exact_match(self):
+        """If the operator bound to a specific non-loopback hostname,
+        the Host header must match exactly."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        assert _is_accepted_host("my-server.corp.net", "my-server.corp.net")
+        assert _is_accepted_host("my-server.corp.net:9119", "my-server.corp.net")
+        # Different host — reject
+        assert not _is_accepted_host("evil.example", "my-server.corp.net")
+        # Loopback — reject (we bound to a specific non-loopback name)
+        assert not _is_accepted_host("localhost", "my-server.corp.net")
+
+    def test_case_insensitive_comparison(self):
+        """Host headers are case-insensitive per RFC — accept variations."""
+        from hermes_cli.web_server import _is_accepted_host
+
+        assert _is_accepted_host("LOCALHOST", "127.0.0.1")
+        assert _is_accepted_host("LocalHost:9119", "127.0.0.1")
+
+
+class TestHostHeaderMiddleware:
+    """End-to-end test via the FastAPI app — verify the middleware
+    rejects bad Host headers with 400."""
+
+    def test_rebinding_request_rejected(self):
+        from fastapi.testclient import TestClient
+        from hermes_cli.web_server import app
+
+        # Simulate start_server having set the bound_host
+        app.state.bound_host = "127.0.0.1"
+        try:
+            client = TestClient(app)
+            # The TestClient sends Host: testserver by default — which is
+            # NOT a loopback alias, so the middleware must reject it.
+            resp = client.get(
+                "/api/status",
+                headers={"Host": "evil.example"},
+            )
+            assert resp.status_code == 400
+            assert "Invalid Host header" in resp.json()["detail"]
+        finally:
+            # Clean up so other tests don't inherit the bound_host
+            if hasattr(app.state, "bound_host"):
+                del app.state.bound_host
+
+    def test_legit_loopback_request_accepted(self):
+        from fastapi.testclient import TestClient
+        from hermes_cli.web_server import app
+
+        app.state.bound_host = "127.0.0.1"
+        try:
+            client = TestClient(app)
+            # /api/status is in _PUBLIC_API_PATHS — passes auth — so the
+            # only thing that can reject is the host header middleware
+            resp = client.get(
+                "/api/status",
+                headers={"Host": "localhost:9119"},
+            )
+            # Either 200 (endpoint served) or some other non-400 —
+            # just not the host-rejection 400
+            assert resp.status_code != 400 or (
+                "Invalid Host header" not in resp.json().get("detail", "")
+            )
+        finally:
+            if hasattr(app.state, "bound_host"):
+                del app.state.bound_host
+
+    def test_no_bound_host_skips_validation(self):
+        """If app.state.bound_host isn't set (e.g. running under test
+        infra without calling start_server), middleware must pass through
+        rather than crash."""
+        from fastapi.testclient import TestClient
+        from hermes_cli.web_server import app
+
+        # Make sure bound_host isn't set
+        if hasattr(app.state, "bound_host"):
+            del app.state.bound_host
+
+        client = TestClient(app)
+        resp = client.get("/api/status")
+        # Should get through to the status endpoint, not a 400
+        assert resp.status_code != 400
--- a/tests/hermes_cli/test_xiaomi_provider.py
+++ b/tests/hermes_cli/test_xiaomi_provider.py
@ -136,13 +136,15 @@ class TestXiaomiModelCatalog:
        assert PROVIDER_TO_MODELS_DEV["xiaomi"] == "xiaomi"

    def test_static_model_list_fallback(self):
-        """Static _PROVIDER_MODELS fallback must exist for model picker."""
+        """Static _PROVIDER_MODELS fallback must exist for model picker.
+
+        We only assert the provider key is present — the specific model
+        names are data that changes with upstream releases and doesn't
+        belong in tests.
+        """
        from hermes_cli.models import _PROVIDER_MODELS
        assert "xiaomi" in _PROVIDER_MODELS
-        models = _PROVIDER_MODELS["xiaomi"]
-        assert "mimo-v2-pro" in models
-        assert "mimo-v2-omni" in models
-        assert "mimo-v2-flash" in models
+        assert len(_PROVIDER_MODELS["xiaomi"]) >= 1

    def test_list_agentic_models_mock(self, monkeypatch):
        """When models.dev returns Xiaomi data, list_agentic_models should return models."""
--- a/tests/run_agent/test_anthropic_prompt_cache_policy.py
+++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py
@ -118,6 +118,86 @@ class TestOpenAIWireFormatOnCustomProvider:
        assert agent._anthropic_prompt_cache_policy() == (False, False)


+class TestQwenAlibabaFamily:
+    """Qwen on OpenCode/OpenCode-Go/Alibaba — needs cache_control even on OpenAI-wire.
+
+    Upstream pi-mono #3392 / #3393 documented that these providers serve
+    zero cache hits without Anthropic-style markers. Regression reported
+    by community user (Qwen3.6 on opencode-go burning through
+    subscription with no cache). Envelope layout, not native, because the
+    wire format is OpenAI chat.completions.
+    """
+
+    def test_qwen_on_opencode_go_caches_with_envelope_layout(self):
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="qwen3.6-plus",
+        )
+        should, native = agent._anthropic_prompt_cache_policy()
+        assert should is True, "Qwen on opencode-go must cache"
+        assert native is False, "opencode-go is OpenAI-wire; envelope layout"
+
+    def test_qwen35_plus_on_opencode_go(self):
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="qwen3.5-plus",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_qwen_on_opencode_zen_caches(self):
+        agent = _make_agent(
+            provider="opencode",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="qwen3-coder-plus",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_qwen_on_direct_alibaba_caches(self):
+        agent = _make_agent(
+            provider="alibaba",
+            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
+            api_mode="chat_completions",
+            model="qwen3-coder",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (True, False)
+
+    def test_non_qwen_on_opencode_go_does_not_cache(self):
+        # GLM / Kimi on opencode-go don't need markers (they have automatic
+        # server-side caching or none at all).
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="glm-5",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+    def test_kimi_on_opencode_go_does_not_cache(self):
+        agent = _make_agent(
+            provider="opencode-go",
+            base_url="https://opencode.ai/v1",
+            api_mode="chat_completions",
+            model="kimi-k2.5",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+    def test_qwen_on_openrouter_not_affected(self):
+        # Qwen via OpenRouter falls through — OpenRouter has its own
+        # upstream caching arrangement for Qwen (provider-dependent).
+        agent = _make_agent(
+            provider="openrouter",
+            base_url="https://openrouter.ai/api/v1",
+            api_mode="chat_completions",
+            model="qwen/qwen3-coder",
+        )
+        assert agent._anthropic_prompt_cache_policy() == (False, False)
+
+
 class TestExplicitOverrides:
    """Policy accepts keyword overrides for switch_model / fallback activation."""

--- a/tests/run_agent/test_create_openai_client_proxy_env.py
+++ b/tests/run_agent/test_create_openai_client_proxy_env.py
@ -67,6 +67,14 @@ def test_get_proxy_from_env_ignores_blank_values(monkeypatch):
    assert _get_proxy_from_env() == "http://real-proxy:8080"


+def test_get_proxy_from_env_normalizes_socks_alias(monkeypatch):
+    for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
+                "https_proxy", "http_proxy", "all_proxy"):
+        monkeypatch.delenv(key, raising=False)
+    monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
+    assert _get_proxy_from_env() == "socks5://127.0.0.1:1080/"
+
+
@patch("run_agent.OpenAI")
 def test_create_openai_client_routes_via_proxy_when_env_set(mock_openai, monkeypatch):
    """With HTTPS_PROXY set, the custom httpx.Client must mount an HTTPProxy pool.
--- a/tests/run_agent/test_interrupt_propagation.py
+++ b/tests/run_agent/test_interrupt_propagation.py
@ -33,6 +33,11 @@ class TestInterruptPropagationToChild(unittest.TestCase):
        agent._active_children = []
        agent._active_children_lock = threading.Lock()
        agent.quiet_mode = True
+        # Provider/model/base_url are read by stale-timeout resolution paths;
+        # the specific values don't matter for interrupt tests.
+        agent.provider = "openrouter"
+        agent.model = "test/model"
+        agent._base_url = "http://localhost:1234"
        return agent

    def test_parent_interrupt_sets_child_flag(self):
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@ -952,6 +952,84 @@ class TestBuildApiKwargs:

        assert "temperature" not in kwargs

+    def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
+        """Kimi endpoint should send max_tokens=32000 and reasoning_effort as
+        top-level params, matching Kimi CLI's default behavior."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["max_tokens"] == 32000
+        assert kwargs["reasoning_effort"] == "medium"
+
+    def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
+        """reasoning_effort should reflect reasoning_config.effort when set."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        agent.reasoning_config = {"enabled": True, "effort": "high"}
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["reasoning_effort"] == "high"
+
+    def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
+        """Kimi endpoint should send extra_body.thinking={"type":"enabled"}
+        to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
+
+    def test_kimi_coding_endpoint_disables_thinking(self, agent):
+        """When reasoning_config.enabled=False, thinking should be disabled
+        and reasoning_effort should be omitted entirely — mirroring Kimi
+        CLI's with_thinking("off") which maps to reasoning_effort=None."""
+        agent.base_url = "https://api.kimi.com/coding/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-for-coding"
+        agent.reasoning_config = {"enabled": False}
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["extra_body"]["thinking"] == {"type": "disabled"}
+        assert "reasoning_effort" not in kwargs
+
+    def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
+        """api.moonshot.ai should get the same Kimi-compatible params."""
+        agent.base_url = "https://api.moonshot.ai/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-k2.5"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["max_tokens"] == 32000
+        assert kwargs["reasoning_effort"] == "medium"
+        assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
+
+    def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
+        """api.moonshot.cn (China endpoint) should get the same params."""
+        agent.base_url = "https://api.moonshot.cn/v1"
+        agent._base_url_lower = agent.base_url.lower()
+        agent.model = "kimi-k2.5"
+        messages = [{"role": "user", "content": "hi"}]
+
+        kwargs = agent._build_api_kwargs(messages)
+
+        assert kwargs["max_tokens"] == 32000
+        assert kwargs["reasoning_effort"] == "medium"
+        assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
+
    def test_provider_preferences_injected(self, agent):
        agent.base_url = "https://openrouter.ai/api/v1"
        agent.providers_allowed = ["Anthropic"]
--- a/tests/test_account_usage.py
+++ b/tests/test_account_usage.py
@ -0,0 +1,203 @@
+from datetime import datetime, timezone
+
+from agent.account_usage import (
+    AccountUsageSnapshot,
+    AccountUsageWindow,
+    fetch_account_usage,
+    render_account_usage_lines,
+)
+
+
+class _Response:
+    def __init__(self, payload, status_code=200):
+        self._payload = payload
+        self.status_code = status_code
+
+    def raise_for_status(self):
+        if self.status_code >= 400:
+            raise RuntimeError(f"HTTP {self.status_code}")
+
+    def json(self):
+        return self._payload
+
+
+class _Client:
+    def __init__(self, payload):
+        self._payload = payload
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def get(self, url, headers=None):
+        return _Response(self._payload)
+
+
+class _RoutingClient:
+    def __init__(self, payloads):
+        self._payloads = payloads
+
+    def __enter__(self):
+        return self
+
+    def __exit__(self, exc_type, exc, tb):
+        return False
+
+    def get(self, url, headers=None):
+        return _Response(self._payloads[url])
+
+
+def test_fetch_account_usage_codex(monkeypatch):
+    monkeypatch.setattr(
+        "agent.account_usage.resolve_codex_runtime_credentials",
+        lambda refresh_if_expiring=True: {
+            "provider": "openai-codex",
+            "base_url": "https://chatgpt.com/backend-api/codex",
+            "api_key": "access-token",
+        },
+    )
+    monkeypatch.setattr(
+        "agent.account_usage._read_codex_tokens",
+        lambda: {"tokens": {"account_id": "acct_123"}},
+    )
+    monkeypatch.setattr(
+        "agent.account_usage.httpx.Client",
+        lambda timeout=15.0: _Client(
+            {
+                "plan_type": "pro",
+                "rate_limit": {
+                    "primary_window": {
+                        "used_percent": 15,
+                        "reset_at": 1_900_000_000,
+                        "limit_window_seconds": 18000,
+                    },
+                    "secondary_window": {
+                        "used_percent": 40,
+                        "reset_at": 1_900_500_000,
+                        "limit_window_seconds": 604800,
+                    },
+                },
+                "credits": {"has_credits": True, "balance": 12.5},
+            }
+        ),
+    )
+
+    snapshot = fetch_account_usage("openai-codex")
+
+    assert snapshot is not None
+    assert snapshot.plan == "Pro"
+    assert len(snapshot.windows) == 2
+    assert snapshot.windows[0].label == "Session"
+    assert snapshot.windows[0].used_percent == 15.0
+    assert snapshot.windows[0].reset_at == datetime.fromtimestamp(1_900_000_000, tz=timezone.utc)
+    assert "Credits balance: $12.50" in snapshot.details
+
+
+def test_render_account_usage_lines_includes_reset_and_provider():
+    snapshot = AccountUsageSnapshot(
+        provider="openai-codex",
+        source="usage_api",
+        fetched_at=datetime.now(timezone.utc),
+        plan="Pro",
+        windows=(
+            AccountUsageWindow(
+                label="Session",
+                used_percent=25,
+                reset_at=datetime.now(timezone.utc),
+            ),
+        ),
+        details=("Credits balance: $9.99",),
+    )
+    lines = render_account_usage_lines(snapshot)
+
+    assert lines[0] == "📈 Account limits"
+    assert "openai-codex (Pro)" in lines[1]
+    assert "Session: 75% remaining (25% used)" in lines[2]
+    assert "Credits balance: $9.99" in lines[3]
+
+
+def test_fetch_account_usage_openrouter_uses_limit_remaining_and_ignores_deprecated_rate_limit(monkeypatch):
+    monkeypatch.setattr(
+        "agent.account_usage.resolve_runtime_provider",
+        lambda requested, explicit_base_url=None, explicit_api_key=None: {
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "sk-test",
+        },
+    )
+    monkeypatch.setattr(
+        "agent.account_usage.httpx.Client",
+        lambda timeout=10.0: _RoutingClient(
+            {
+                "https://openrouter.ai/api/v1/credits": {
+                    "data": {"total_credits": 300.0, "total_usage": 10.92}
+                },
+                "https://openrouter.ai/api/v1/key": {
+                    "data": {
+                        "limit": 100.0,
+                        "limit_remaining": 70.0,
+                        "limit_reset": "monthly",
+                        "usage": 12.5,
+                        "usage_daily": 0.5,
+                        "usage_weekly": 2.0,
+                        "usage_monthly": 8.0,
+                        "rate_limit": {"requests": -1, "interval": "10s"},
+                    }
+                },
+            }
+        ),
+    )
+
+    snapshot = fetch_account_usage("openrouter")
+
+    assert snapshot is not None
+    assert snapshot.windows == (
+        AccountUsageWindow(
+            label="API key quota",
+            used_percent=30.0,
+            detail="$70.00 of $100.00 remaining • resets monthly",
+        ),
+    )
+    assert "Credits balance: $289.08" in snapshot.details
+    assert "API key usage: $12.50 total • $0.50 today • $2.00 this week • $8.00 this month" in snapshot.details
+    assert all("-1 requests / 10s" not in line for line in render_account_usage_lines(snapshot))
+
+
+def test_fetch_account_usage_openrouter_omits_quota_window_when_key_has_no_limit(monkeypatch):
+    monkeypatch.setattr(
+        "agent.account_usage.resolve_runtime_provider",
+        lambda requested, explicit_base_url=None, explicit_api_key=None: {
+            "provider": "openrouter",
+            "base_url": "https://openrouter.ai/api/v1",
+            "api_key": "sk-test",
+        },
+    )
+    monkeypatch.setattr(
+        "agent.account_usage.httpx.Client",
+        lambda timeout=10.0: _RoutingClient(
+            {
+                "https://openrouter.ai/api/v1/credits": {
+                    "data": {"total_credits": 100.0, "total_usage": 25.5}
+                },
+                "https://openrouter.ai/api/v1/key": {
+                    "data": {
+                        "limit": None,
+                        "limit_remaining": None,
+                        "usage": 25.5,
+                        "usage_daily": 1.25,
+                        "usage_weekly": 4.5,
+                        "usage_monthly": 18.0,
+                    }
+                },
+            }
+        ),
+    )
+
+    snapshot = fetch_account_usage("openrouter")
+
+    assert snapshot is not None
+    assert snapshot.windows == ()
+    assert "Credits balance: $74.50" in snapshot.details
+    assert "API key usage: $25.50 total • $1.25 today • $4.50 this week • $18.00 this month" in snapshot.details
--- a/tests/test_base_url_hostname.py
+++ b/tests/test_base_url_hostname.py
@ -106,3 +106,55 @@ class TestBaseUrlHostMatchesEdgeCases:

    def test_trailing_dot_on_domain_stripped(self):
        assert base_url_host_matches("https://openrouter.ai/v1", "openrouter.ai.") is True
+
+
+class TestOllamaUrlHostCheck:
+    """GHSA-76xc-57q6-vm5m — ollama.com was using a raw substring match for
+    credential selection (same bug class as GHSA-xf8p-v2cg-h7h5 for OpenRouter).
+    These tests lock in that the base_url_host_matches fix correctly rejects
+    the same attack vectors for Ollama.
+    """
+
+    def test_ollama_com_path_injection_rejected(self):
+        """http://evil.test/ollama.com/v1 — ollama.com appears in the path,
+        not the host. Must not be treated as Ollama Cloud."""
+        assert base_url_host_matches(
+            "http://127.0.0.1:9000/ollama.com/v1", "ollama.com"
+        ) is False
+
+    def test_ollama_com_subdomain_lookalike_rejected(self):
+        """ollama.com.attacker.test is a separate host, not ollama.com."""
+        assert base_url_host_matches(
+            "http://ollama.com.attacker.test:9000/v1", "ollama.com"
+        ) is False
+
+    def test_ollama_com_localtest_me_rejected(self):
+        """ollama.com.localtest.me resolves to 127.0.0.1 via localtest.me
+        but its true hostname is localtest.me, not ollama.com."""
+        assert base_url_host_matches(
+            "http://ollama.com.localtest.me:9000/v1", "ollama.com"
+        ) is False
+
+    def test_ollama_ai_is_not_ollama_com(self):
+        """Different TLD. ollama.ai is not ollama.com."""
+        assert base_url_host_matches(
+            "https://ollama.ai/v1", "ollama.com"
+        ) is False
+
+    def test_localhost_ollama_port_is_not_ollama_com(self):
+        """http://localhost:11434/v1 is a local Ollama install, but its
+        hostname is localhost, so OLLAMA_API_KEY (an ollama.com-only secret)
+        must not be sent."""
+        assert base_url_host_matches(
+            "http://localhost:11434/v1", "ollama.com"
+        ) is False
+
+    def test_genuine_ollama_com_matches(self):
+        assert base_url_host_matches(
+            "https://ollama.com/api/generate", "ollama.com"
+        ) is True
+
+    def test_ollama_com_subdomain_matches(self):
+        assert base_url_host_matches(
+            "https://api.ollama.com/v1", "ollama.com"
+        ) is True
--- a/tests/test_transform_tool_result_hook.py
+++ b/tests/test_transform_tool_result_hook.py
@ -161,6 +161,8 @@ def test_transform_tool_result_runs_after_post_tool_call(monkeypatch):

 def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_path):
    """End-to-end: load a real plugin from HERMES_HOME and verify it rewrites results."""
+    import yaml
+
    hermes_home = Path(os.environ["HERMES_HOME"])
    plugins_dir = hermes_home / "plugins"
    plugin_dir = plugins_dir / "transform_result_canon"
@ -172,7 +174,15 @@ def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_pat
        'lambda **kw: f\'CANON[{kw["tool_name"]}]\' + kw["result"])\n',
        encoding="utf-8",
    )
+    # Plugins are opt-in — must be listed in plugins.enabled to load.
+    cfg_path = hermes_home / "config.yaml"
+    cfg_path.write_text(
+        yaml.safe_dump({"plugins": {"enabled": ["transform_result_canon"]}}),
+        encoding="utf-8",
+    )

+    # Force a fresh plugin manager so the new config is picked up.
+    plugins_mod._plugin_manager = plugins_mod.PluginManager()
    plugins_mod.discover_plugins()

    out = _run_handle_function_call(
--- a/tests/tools/test_browser_camofox_state.py
+++ b/tests/tools/test_browser_camofox_state.py
@ -58,10 +58,3 @@ class TestCamofoxConfigDefaults:

        browser_cfg = DEFAULT_CONFIG["browser"]
        assert browser_cfg["camofox"]["managed_persistence"] is False
-
-    def test_config_version_matches_current_schema(self):
-        from hermes_cli.config import DEFAULT_CONFIG
-
-        # The current schema version is tracked globally; unrelated default
-        # options may bump it after browser defaults are added.
-        assert DEFAULT_CONFIG["_config_version"] == 20
--- a/tests/tools/test_env_passthrough.py
+++ b/tests/tools/test_env_passthrough.py
@ -172,28 +172,60 @@ class TestTerminalIntegration:
        assert blocked_var not in result
        assert "PATH" in result

-    def test_passthrough_allows_blocklisted_var(self):
-        from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST
+    def test_passthrough_cannot_override_provider_blocklist(self):
+        """GHSA-rhgp-j443-p4rf: register_env_passthrough must NOT accept
+        Hermes provider credentials — that was the bypass where a skill
+        could declare ANTHROPIC_TOKEN / OPENAI_API_KEY as passthrough and
+        defeat the execute_code sandbox scrubbing."""
+        from tools.environments.local import (
+            _sanitize_subprocess_env,
+            _HERMES_PROVIDER_ENV_BLOCKLIST,
+        )

        blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
+        # Attempt to register — must be silently refused (logged warning).
        register_env_passthrough([blocked_var])

+        # is_env_passthrough must NOT report it as allowed
+        assert not is_env_passthrough(blocked_var)
+
+        # Sanitizer still strips the var from subprocess env
        env = {blocked_var: "secret_value", "PATH": "/usr/bin"}
        result = _sanitize_subprocess_env(env)
-        assert blocked_var in result
-        assert result[blocked_var] == "secret_value"
+        assert blocked_var not in result
+        assert "PATH" in result

-    def test_make_run_env_passthrough(self, monkeypatch):
-        from tools.environments.local import _make_run_env, _HERMES_PROVIDER_ENV_BLOCKLIST
+    def test_make_run_env_blocklist_override_rejected(self):
+        """_make_run_env must NOT expose a blocklisted var to subprocess env
+        even after a skill attempts to register it via passthrough."""
+        import os
+        from tools.environments.local import (
+            _make_run_env,
+            _HERMES_PROVIDER_ENV_BLOCKLIST,
+        )

        blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
-        monkeypatch.setenv(blocked_var, "secret_value")
+        os.environ[blocked_var] = "secret_value"
+        try:
+            # Without passthrough — blocked
+            result_before = _make_run_env({})
+            assert blocked_var not in result_before

-        # Without passthrough — blocked
-        result_before = _make_run_env({})
-        assert blocked_var not in result_before
+            # Skill tries to register it — must be refused, so still blocked
+            register_env_passthrough([blocked_var])
+            result_after = _make_run_env({})
+            assert blocked_var not in result_after
+        finally:
+            os.environ.pop(blocked_var, None)

-        # With passthrough — allowed
-        register_env_passthrough([blocked_var])
-        result_after = _make_run_env({})
-        assert blocked_var in result_after
+    def test_non_hermes_api_key_still_registerable(self):
+        """Third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
+        Hermes provider credentials and must still pass through — skills
+        that legitimately wrap third-party APIs must keep working."""
+        # TENOR_API_KEY is a real example — used by the gif-search skill
+        register_env_passthrough(["TENOR_API_KEY"])
+        assert is_env_passthrough("TENOR_API_KEY")
+
+        # Arbitrary skill-specific var
+        register_env_passthrough(["MY_SKILL_CUSTOM_CONFIG"])
+        assert is_env_passthrough("MY_SKILL_CUSTOM_CONFIG")
--- a/tests/tools/test_fuzzy_match.py
+++ b/tests/tools/test_fuzzy_match.py
@ -230,3 +230,102 @@ class TestEscapeDriftGuard:
        new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
        assert err is None
        assert count == 1
+
+
+class TestFindClosestLines:
+    def setup_method(self):
+        from tools.fuzzy_match import find_closest_lines
+        self.find_closest_lines = find_closest_lines
+
+    def test_finds_similar_line(self):
+        content = "def foo():\n    pass\ndef bar():\n    return 1\n"
+        result = self.find_closest_lines("def baz():", content)
+        assert "def foo" in result or "def bar" in result
+
+    def test_returns_empty_for_no_match(self):
+        content = "completely different content here"
+        result = self.find_closest_lines("xyzzy_no_match_possible_!!!", content)
+        assert result == ""
+
+    def test_returns_empty_for_empty_inputs(self):
+        assert self.find_closest_lines("", "some content") == ""
+        assert self.find_closest_lines("old string", "") == ""
+
+    def test_includes_context_lines(self):
+        content = "line1\nline2\ndef target():\n    pass\nline5\n"
+        result = self.find_closest_lines("def target():", content)
+        assert "target" in result
+
+    def test_includes_line_numbers(self):
+        content = "line1\nline2\ndef foo():\n    pass\n"
+        result = self.find_closest_lines("def foo():", content)
+        # Should include line numbers in format "N| content"
+        assert "|" in result
+
+
+class TestFormatNoMatchHint:
+    """Gating tests for format_no_match_hint — the shared helper that decides
+    whether a 'Did you mean?' snippet should be appended to an error.
+    """
+
+    def setup_method(self):
+        from tools.fuzzy_match import format_no_match_hint
+        self.fmt = format_no_match_hint
+
+    def test_fires_on_could_not_find_with_match(self):
+        """Classic no-match: similar content exists → hint fires."""
+        content = "def foo():\n    pass\ndef bar():\n    pass\n"
+        result = self.fmt(
+            "Could not find a match for old_string in the file",
+            0, "def baz():", content,
+        )
+        assert "Did you mean" in result
+        assert "foo" in result or "bar" in result
+
+    def test_silent_on_ambiguous_match_error(self):
+        """'Found N matches' is not a missing-match failure — no hint."""
+        content = "aaa bbb aaa\n"
+        result = self.fmt(
+            "Found 2 matches for old_string. Provide more context to make it unique, or use replace_all=True.",
+            0, "aaa", content,
+        )
+        assert result == ""
+
+    def test_silent_on_escape_drift_error(self):
+        """Escape-drift errors are intentional blocks — hint would mislead."""
+        content = "x = 1\n"
+        result = self.fmt(
+            "Escape-drift detected: old_string and new_string contain the literal sequence '\\\\''...",
+            0, "x = \\'1\\'", content,
+        )
+        assert result == ""
+
+    def test_silent_on_identical_strings(self):
+        """old_string == new_string — hint irrelevant."""
+        result = self.fmt(
+            "old_string and new_string are identical",
+            0, "foo", "foo bar\n",
+        )
+        assert result == ""
+
+    def test_silent_when_match_count_nonzero(self):
+        """If match succeeded, we shouldn't be in the error path — defense in depth."""
+        result = self.fmt(
+            "Could not find a match for old_string in the file",
+            1, "foo", "foo bar\n",
+        )
+        assert result == ""
+
+    def test_silent_on_none_error(self):
+        """No error at all — no hint."""
+        result = self.fmt(None, 0, "foo", "bar\n")
+        assert result == ""
+
+    def test_silent_when_no_similar_content(self):
+        """Even for a valid no-match error, skip hint when nothing similar exists."""
+        result = self.fmt(
+            "Could not find a match for old_string in the file",
+            0, "totally_unique_xyzzy_qux", "abc\nxyz\n",
+        )
+        assert result == ""
+
--- a/tests/tools/test_image_generation_env.py
+++ b/tests/tools/test_image_generation_env.py
@ -0,0 +1,39 @@
+"""FAL_KEY env var normalization (whitespace-only treated as unset)."""
+
+
+def test_fal_key_whitespace_is_unset(monkeypatch):
+    # Whitespace-only FAL_KEY must NOT register as configured, and the managed
+    # gateway fallback must be disabled for this assertion to be meaningful.
+    monkeypatch.setenv("FAL_KEY", "   ")
+
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
+    )
+
+    assert image_generation_tool.check_fal_api_key() is False
+
+
+def test_fal_key_valid(monkeypatch):
+    monkeypatch.setenv("FAL_KEY", "sk-test")
+
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
+    )
+
+    assert image_generation_tool.check_fal_api_key() is True
+
+
+def test_fal_key_empty_is_unset(monkeypatch):
+    monkeypatch.setenv("FAL_KEY", "")
+
+    from tools import image_generation_tool
+
+    monkeypatch.setattr(
+        image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
+    )
+
+    assert image_generation_tool.check_fal_api_key() is False
--- a/tests/tools/test_local_shell_init.py
+++ b/tests/tools/test_local_shell_init.py
@ -0,0 +1,162 @@
+"""Tests for terminal.shell_init_files / terminal.auto_source_bashrc.
+
+A bash ``-l -c`` invocation does NOT source ``~/.bashrc``, so tools that
+register themselves there (nvm, asdf, pyenv) stay invisible to the
+environment snapshot built by ``LocalEnvironment.init_session``.  These
+tests verify the config-driven prelude that fixes that.
+"""
+
+import os
+from unittest.mock import patch
+
+import pytest
+
+from tools.environments.local import (
+    LocalEnvironment,
+    _prepend_shell_init,
+    _read_terminal_shell_init_config,
+    _resolve_shell_init_files,
+)
+
+
+class TestResolveShellInitFiles:
+    def test_auto_sources_bashrc_when_present(self, tmp_path, monkeypatch):
+        bashrc = tmp_path / ".bashrc"
+        bashrc.write_text('export MARKER=seen\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        # Default config: auto_source_bashrc on, no explicit list.
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([], True),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == [str(bashrc)]
+
+    def test_skips_bashrc_when_missing(self, tmp_path, monkeypatch):
+        # No bashrc written.
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([], True),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == []
+
+    def test_auto_source_bashrc_off_suppresses_default(self, tmp_path, monkeypatch):
+        bashrc = tmp_path / ".bashrc"
+        bashrc.write_text('export MARKER=seen\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([], False),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == []
+
+    def test_explicit_list_wins_over_auto(self, tmp_path, monkeypatch):
+        bashrc = tmp_path / ".bashrc"
+        bashrc.write_text('export FROM_BASHRC=1\n')
+        custom = tmp_path / "custom.sh"
+        custom.write_text('export FROM_CUSTOM=1\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+
+        # auto_source_bashrc stays True but the explicit list takes precedence.
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([str(custom)], True),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == [str(custom)]
+        assert str(bashrc) not in resolved
+
+    def test_expands_home_and_env_vars(self, tmp_path, monkeypatch):
+        target = tmp_path / "rc" / "custom.sh"
+        target.parent.mkdir()
+        target.write_text('export A=1\n')
+        monkeypatch.setenv("HOME", str(tmp_path))
+        monkeypatch.setenv("CUSTOM_RC_DIR", str(tmp_path / "rc"))
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=(["~/rc/custom.sh"], False),
+        ):
+            resolved_home = _resolve_shell_init_files()
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=(["${CUSTOM_RC_DIR}/custom.sh"], False),
+        ):
+            resolved_var = _resolve_shell_init_files()
+
+        assert resolved_home == [str(target)]
+        assert resolved_var == [str(target)]
+
+    def test_missing_explicit_files_are_skipped_silently(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HOME", str(tmp_path))
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([str(tmp_path / "does-not-exist.sh")], False),
+        ):
+            resolved = _resolve_shell_init_files()
+
+        assert resolved == []
+
+
+class TestPrependShellInit:
+    def test_empty_list_returns_command_unchanged(self):
+        assert _prepend_shell_init("echo hi", []) == "echo hi"
+
+    def test_prepends_guarded_source_lines(self):
+        wrapped = _prepend_shell_init("echo hi", ["/tmp/a.sh", "/tmp/b.sh"])
+        assert "echo hi" in wrapped
+        # Each file is sourced through a guarded [ -r … ] && . '…' || true
+        # pattern so a missing/broken rc can't abort the bootstrap.
+        assert "/tmp/a.sh" in wrapped
+        assert "/tmp/b.sh" in wrapped
+        assert "|| true" in wrapped
+        assert "set +e" in wrapped
+
+    def test_escapes_single_quotes(self):
+        wrapped = _prepend_shell_init("echo hi", ["/tmp/o'malley.sh"])
+        # The path must survive as the shell receives it; embedded single
+        # quote is escaped as '\'' rather than breaking the outer quoting.
+        assert "o'\\''malley" in wrapped
+
+
+@pytest.mark.skipif(
+    os.environ.get("CI") == "true" and not os.path.isfile("/bin/bash"),
+    reason="Requires bash; CI sandbox may strip it.",
+)
+class TestSnapshotEndToEnd:
+    """Spin up a real LocalEnvironment and confirm the snapshot sources
+    extra init files."""
+
+    def test_snapshot_picks_up_init_file_exports(self, tmp_path, monkeypatch):
+        init_file = tmp_path / "custom-init.sh"
+        init_file.write_text(
+            'export HERMES_SHELL_INIT_PROBE="probe-ok"\n'
+            'export PATH="/opt/shell-init-probe/bin:$PATH"\n'
+        )
+
+        with patch(
+            "tools.environments.local._read_terminal_shell_init_config",
+            return_value=([str(init_file)], False),
+        ):
+            env = LocalEnvironment(cwd=str(tmp_path), timeout=15)
+            try:
+                result = env.execute(
+                    'echo "PROBE=$HERMES_SHELL_INIT_PROBE"; echo "PATH=$PATH"'
+                )
+            finally:
+                env.cleanup()
+
+        output = result.get("output", "")
+        assert "PROBE=probe-ok" in output
+        assert "/opt/shell-init-probe/bin" in output
--- a/tests/tools/test_mcp_circuit_breaker.py
+++ b/tests/tools/test_mcp_circuit_breaker.py
@ -0,0 +1,252 @@
+"""Tests for MCP tool-handler circuit-breaker recovery.
+
+The circuit breaker in ``tools/mcp_tool.py`` is intended to short-circuit
+calls to an MCP server that has failed ``_CIRCUIT_BREAKER_THRESHOLD``
+consecutive times, then *transition back to a usable state* once the
+server has had time to recover (or an explicit reconnect succeeds).
+
+The original implementation only had two states — closed and open — with
+no mechanism to transition back to closed, so a tripped breaker stayed
+tripped for the lifetime of the process. These tests lock in the
+half-open / cooldown / reconnect-resets-breaker behavior that fixes
+that.
+"""
+import json
+from unittest.mock import MagicMock
+
+import pytest
+
+
+pytest.importorskip("mcp.client.auth.oauth2")
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+
+def _install_stub_server(mcp_tool_module, name: str, call_tool_impl):
+    """Install a fake MCP server in the module's registry.
+
+    ``call_tool_impl`` is an async function stored at ``session.call_tool``
+    (it's what the tool handler invokes).
+    """
+    server = MagicMock()
+    server.name = name
+    session = MagicMock()
+    session.call_tool = call_tool_impl
+    server.session = session
+    server._reconnect_event = MagicMock()
+    server._ready = MagicMock()
+    server._ready.is_set.return_value = True
+
+    mcp_tool_module._servers[name] = server
+    mcp_tool_module._server_error_counts.pop(name, None)
+    if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
+        mcp_tool_module._server_breaker_opened_at.pop(name, None)
+    return server
+
+
+def _cleanup(mcp_tool_module, name: str) -> None:
+    mcp_tool_module._servers.pop(name, None)
+    mcp_tool_module._server_error_counts.pop(name, None)
+    if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
+        mcp_tool_module._server_breaker_opened_at.pop(name, None)
+
+
+# ---------------------------------------------------------------------------
+# Tests
+# ---------------------------------------------------------------------------
+
+
+def test_circuit_breaker_half_opens_after_cooldown(monkeypatch, tmp_path):
+    """After a tripped breaker's cooldown elapses, the *next* call must
+    actually execute against the session (half-open probe). When the
+    probe succeeds, the breaker resets to fully closed.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from tools import mcp_tool
+    from tools.mcp_tool import _make_tool_handler
+
+    call_count = {"n": 0}
+
+    async def _call_tool_success(*a, **kw):
+        call_count["n"] += 1
+        result = MagicMock()
+        result.isError = False
+        block = MagicMock()
+        block.text = "ok"
+        result.content = [block]
+        result.structuredContent = None
+        return result
+
+    _install_stub_server(mcp_tool, "srv", _call_tool_success)
+    mcp_tool._ensure_mcp_loop()
+
+    try:
+        # Trip the breaker by setting the count at/above threshold and
+        # stamping the open-time to "now".
+        mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
+        fake_now = [1000.0]
+
+        def _fake_monotonic():
+            return fake_now[0]
+
+        monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
+        # The breaker-open timestamp dict is introduced by the fix; on
+        # a pre-fix build it won't exist, which will cause the test to
+        # fail at the .get() inside the gate (correct — the fix is
+        # required for this state to be tracked at all).
+        if hasattr(mcp_tool, "_server_breaker_opened_at"):
+            mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
+        cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
+
+        handler = _make_tool_handler("srv", "tool1", 10.0)
+
+        # Before cooldown: must short-circuit (no session call).
+        result = handler({})
+        parsed = json.loads(result)
+        assert "error" in parsed, parsed
+        assert "unreachable" in parsed["error"].lower()
+        assert call_count["n"] == 0, (
+            "breaker should short-circuit before cooldown elapses"
+        )
+
+        # Advance past cooldown → next call is a half-open probe that
+        # actually hits the session.
+        fake_now[0] += cooldown + 1.0
+
+        result = handler({})
+        parsed = json.loads(result)
+        assert parsed.get("result") == "ok", parsed
+        assert call_count["n"] == 1, "half-open probe should invoke session"
+
+        # On probe success the breaker must close (count reset to 0).
+        assert mcp_tool._server_error_counts.get("srv", 0) == 0
+    finally:
+        _cleanup(mcp_tool, "srv")
+
+
+def test_circuit_breaker_reopens_on_probe_failure(monkeypatch, tmp_path):
+    """If the half-open probe fails, the breaker must re-arm the
+    cooldown (not let every subsequent call through).
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from tools import mcp_tool
+    from tools.mcp_tool import _make_tool_handler
+
+    call_count = {"n": 0}
+
+    async def _call_tool_fails(*a, **kw):
+        call_count["n"] += 1
+        raise RuntimeError("still broken")
+
+    _install_stub_server(mcp_tool, "srv", _call_tool_fails)
+    mcp_tool._ensure_mcp_loop()
+
+    try:
+        mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
+        fake_now = [1000.0]
+
+        def _fake_monotonic():
+            return fake_now[0]
+
+        monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
+        if hasattr(mcp_tool, "_server_breaker_opened_at"):
+            mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
+        cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
+
+        handler = _make_tool_handler("srv", "tool1", 10.0)
+
+        # Advance past cooldown, run probe, expect failure.
+        fake_now[0] += cooldown + 1.0
+        result = handler({})
+        parsed = json.loads(result)
+        assert "error" in parsed
+        assert call_count["n"] == 1, "probe should invoke session once"
+
+        # The probe failure must have re-armed the cooldown — another
+        # immediate call should short-circuit, not invoke session again.
+        result = handler({})
+        parsed = json.loads(result)
+        assert "unreachable" in parsed.get("error", "").lower()
+        assert call_count["n"] == 1, (
+            "breaker should re-open and block further calls after probe failure"
+        )
+    finally:
+        _cleanup(mcp_tool, "srv")
+
+
+def test_circuit_breaker_cleared_on_reconnect(monkeypatch, tmp_path):
+    """When the auth-recovery path successfully reconnects the server,
+    the breaker should be cleared so subsequent calls aren't gated on a
+    stale failure count — even if the post-reconnect retry itself fails.
+
+    This locks in the fix-#2 contract: a successful reconnect is
+    sufficient evidence that the server is viable again. Under the old
+    implementation, reset only happened on retry *success*, so a
+    reconnect+retry-failure left the counter pinned above threshold
+    forever.
+    """
+    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+    from tools import mcp_tool
+    from tools.mcp_oauth_manager import get_manager, reset_manager_for_tests
+    from mcp.client.auth import OAuthFlowError
+
+    reset_manager_for_tests()
+
+    async def _call_tool_unused(*a, **kw):  # pragma: no cover
+        raise AssertionError("session.call_tool should not be reached in this test")
+
+    _install_stub_server(mcp_tool, "srv", _call_tool_unused)
+    mcp_tool._ensure_mcp_loop()
+
+    # Open the breaker well above threshold, with a recent open-time so
+    # it would short-circuit everything without a reset.
+    mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + 2
+    if hasattr(mcp_tool, "_server_breaker_opened_at"):
+        import time as _time
+        mcp_tool._server_breaker_opened_at["srv"] = _time.monotonic()
+
+    # Force handle_401 to claim recovery succeeded.
+    mgr = get_manager()
+
+    async def _h401(name, token=None):
+        return True
+
+    monkeypatch.setattr(mgr, "handle_401", _h401)
+
+    try:
+        # Retry fails *after* the successful reconnect. Under the old
+        # implementation this bumps an already-tripped counter even
+        # higher. Under fix #2 the reset happens on successful
+        # reconnect, and the post-retry bump only raises the fresh
+        # count to 1 — still below threshold.
+        def _retry_call():
+            raise OAuthFlowError("still failing post-reconnect")
+
+        result = mcp_tool._handle_auth_error_and_retry(
+            "srv",
+            OAuthFlowError("initial"),
+            _retry_call,
+            "tools/call test",
+        )
+        # The call as a whole still surfaces needs_reauth because the
+        # retry itself didn't succeed, but the breaker state must
+        # reflect the successful reconnect.
+        assert result is not None
+        parsed = json.loads(result)
+        assert parsed.get("needs_reauth") is True, parsed
+
+        # Post-reconnect count was reset to 0, then the failing retry
+        # bumped it to exactly 1 — well below threshold.
+        count = mcp_tool._server_error_counts.get("srv", 0)
+        assert count < mcp_tool._CIRCUIT_BREAKER_THRESHOLD, (
+            f"successful reconnect must reset the breaker below threshold; "
+            f"got count={count}, threshold={mcp_tool._CIRCUIT_BREAKER_THRESHOLD}"
+        )
+    finally:
+        _cleanup(mcp_tool, "srv")
--- a/tests/tools/test_terminal_output_transform_hook.py
+++ b/tests/tools/test_terminal_output_transform_hook.py
@ -173,6 +173,8 @@ def test_terminal_output_transform_does_not_change_approval_or_exit_code_meaning


 def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp_path):
+    import yaml
+
    hermes_home = Path(os.environ["HERMES_HOME"])
    plugins_dir = hermes_home / "plugins"
    plugin_dir = plugins_dir / "terminal_transform"
@ -184,7 +186,15 @@ def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp
        'lambda **kw: "PLUGIN-HEAD\\n" + kw["output"] + "\\nPLUGIN-TAIL")\n',
        encoding="utf-8",
    )
+    # Plugins are opt-in — must be listed in plugins.enabled to load.
+    cfg_path = hermes_home / "config.yaml"
+    cfg_path.write_text(
+        yaml.safe_dump({"plugins": {"enabled": ["terminal_transform"]}}),
+        encoding="utf-8",
+    )

+    # Force a fresh plugin manager so the new config is picked up.
+    plugins_mod._plugin_manager = plugins_mod.PluginManager()
    plugins_mod.discover_plugins()

    long_output = "X" * 60000
--- a/tests/tools/test_tts_kittentts.py
+++ b/tests/tools/test_tts_kittentts.py
@ -0,0 +1,198 @@
+"""Tests for the KittenTTS local provider in tools/tts_tool.py."""
+
+import json
+from unittest.mock import MagicMock, patch
+
+import numpy as np
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def clean_env(monkeypatch):
+    for key in ("HERMES_SESSION_PLATFORM",):
+        monkeypatch.delenv(key, raising=False)
+
+
+@pytest.fixture(autouse=True)
+def clear_kittentts_cache():
+    """Reset the module-level model cache between tests."""
+    from tools import tts_tool as _tt
+    _tt._kittentts_model_cache.clear()
+    yield
+    _tt._kittentts_model_cache.clear()
+
+
+@pytest.fixture
+def mock_kittentts_module():
+    """Inject a fake kittentts + soundfile module that return stub objects."""
+    fake_model = MagicMock()
+    # 24kHz float32 PCM at ~2s of silence
+    fake_model.generate.return_value = np.zeros(48000, dtype=np.float32)
+    fake_cls = MagicMock(return_value=fake_model)
+    fake_kittentts = MagicMock()
+    fake_kittentts.KittenTTS = fake_cls
+
+    # Stub soundfile — the real package isn't installed in CI venv, and
+    # _generate_kittentts does `import soundfile as sf` at runtime.
+    fake_sf = MagicMock()
+    def _fake_write(path, audio, samplerate):
+        # Emulate writing a real file so downstream path checks succeed.
+        import pathlib
+        pathlib.Path(path).write_bytes(b"RIFF\x00\x00\x00\x00WAVEfmt fake")
+    fake_sf.write = _fake_write
+
+    with patch.dict(
+        "sys.modules",
+        {"kittentts": fake_kittentts, "soundfile": fake_sf},
+    ):
+        yield fake_model, fake_cls
+
+
+class TestGenerateKittenTts:
+    def test_successful_wav_generation(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        fake_model, fake_cls = mock_kittentts_module
+        output_path = str(tmp_path / "test.wav")
+        result = _generate_kittentts("Hello world", output_path, {})
+
+        assert result == output_path
+        assert (tmp_path / "test.wav").exists()
+        fake_cls.assert_called_once()
+        fake_model.generate.assert_called_once()
+
+    def test_config_passes_voice_speed_cleantext(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        fake_model, _ = mock_kittentts_module
+        config = {
+            "kittentts": {
+                "model": "KittenML/kitten-tts-mini-0.8",
+                "voice": "Luna",
+                "speed": 1.25,
+                "clean_text": False,
+            }
+        }
+        _generate_kittentts("Hi there", str(tmp_path / "out.wav"), config)
+
+        call_kwargs = fake_model.generate.call_args.kwargs
+        assert call_kwargs["voice"] == "Luna"
+        assert call_kwargs["speed"] == 1.25
+        assert call_kwargs["clean_text"] is False
+
+    def test_default_model_and_voice(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import (
+            DEFAULT_KITTENTTS_MODEL,
+            DEFAULT_KITTENTTS_VOICE,
+            _generate_kittentts,
+        )
+
+        fake_model, fake_cls = mock_kittentts_module
+        _generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
+
+        fake_cls.assert_called_once_with(DEFAULT_KITTENTTS_MODEL)
+        assert fake_model.generate.call_args.kwargs["voice"] == DEFAULT_KITTENTTS_VOICE
+
+    def test_model_is_cached_across_calls(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        _, fake_cls = mock_kittentts_module
+        _generate_kittentts("One", str(tmp_path / "a.wav"), {})
+        _generate_kittentts("Two", str(tmp_path / "b.wav"), {})
+
+        # Same model name → class instantiated exactly once
+        assert fake_cls.call_count == 1
+
+    def test_different_models_are_cached_separately(self, tmp_path, mock_kittentts_module):
+        from tools.tts_tool import _generate_kittentts
+
+        _, fake_cls = mock_kittentts_module
+        _generate_kittentts(
+            "A", str(tmp_path / "a.wav"),
+            {"kittentts": {"model": "KittenML/kitten-tts-nano-0.8-int8"}},
+        )
+        _generate_kittentts(
+            "B", str(tmp_path / "b.wav"),
+            {"kittentts": {"model": "KittenML/kitten-tts-mini-0.8"}},
+        )
+
+        assert fake_cls.call_count == 2
+
+    def test_non_wav_extension_triggers_ffmpeg_conversion(
+        self, tmp_path, mock_kittentts_module, monkeypatch
+    ):
+        """Non-.wav output path causes WAV → target ffmpeg conversion."""
+        from tools import tts_tool as _tt
+
+        calls = []
+
+        def fake_shutil_which(cmd):
+            return "/usr/bin/ffmpeg" if cmd == "ffmpeg" else None
+
+        def fake_run(cmd, check=False, timeout=None, **kw):
+            calls.append(cmd)
+            # Emulate ffmpeg writing the output file
+            import pathlib
+            out_path = cmd[-1]
+            pathlib.Path(out_path).write_bytes(b"fake-mp3-data")
+            return MagicMock(returncode=0)
+
+        monkeypatch.setattr(_tt.shutil, "which", fake_shutil_which)
+        monkeypatch.setattr(_tt.subprocess, "run", fake_run)
+
+        output_path = str(tmp_path / "test.mp3")
+        result = _tt._generate_kittentts("Hi", output_path, {})
+
+        assert result == output_path
+        assert len(calls) == 1
+        assert calls[0][0] == "/usr/bin/ffmpeg"
+
+    def test_missing_kittentts_raises_import_error(self, tmp_path, monkeypatch):
+        """When kittentts package is not installed, _import_kittentts raises."""
+        import sys
+        monkeypatch.setitem(sys.modules, "kittentts", None)
+        from tools.tts_tool import _generate_kittentts
+
+        with pytest.raises((ImportError, TypeError)):
+            _generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
+
+
+class TestCheckKittenttsAvailable:
+    def test_reports_available_when_package_present(self, monkeypatch):
+        import importlib.util
+        from tools.tts_tool import _check_kittentts_available
+
+        fake_spec = MagicMock()
+        monkeypatch.setattr(
+            importlib.util, "find_spec",
+            lambda name: fake_spec if name == "kittentts" else None,
+        )
+        assert _check_kittentts_available() is True
+
+    def test_reports_unavailable_when_package_missing(self, monkeypatch):
+        import importlib.util
+        from tools.tts_tool import _check_kittentts_available
+
+        monkeypatch.setattr(importlib.util, "find_spec", lambda name: None)
+        assert _check_kittentts_available() is False
+
+
+class TestDispatcherBranch:
+    def test_kittentts_not_installed_returns_helpful_error(self, monkeypatch, tmp_path):
+        """When provider=kittentts but package missing, return JSON error with setup hint."""
+        import sys
+        monkeypatch.setitem(sys.modules, "kittentts", None)
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+
+        from tools.tts_tool import text_to_speech_tool
+
+        # Write a config telling it to use kittentts
+        import yaml
+        (tmp_path / "config.yaml").write_text(
+            yaml.safe_dump({"tts": {"provider": "kittentts"}})
+        )
+
+        result = json.loads(text_to_speech_tool(text="Hello"))
+        assert result["success"] is False
+        assert "kittentts" in result["error"].lower()
+        assert "hermes setup tts" in result["error"].lower()
--- a/tests/tools/test_voice_cli_integration.py
+++ b/tests/tools/test_voice_cli_integration.py
@ -933,6 +933,58 @@ class TestEnableVoiceModeReal:
        assert cli._voice_mode is True


+class TestVoiceBeepConfigReal:
+    """Tests the CLI voice beep toggle."""
+
+    @patch("hermes_cli.config.load_config", return_value={"voice": {}})
+    def test_beeps_enabled_by_default(self, _cfg):
+        cli = _make_voice_cli()
+        assert cli._voice_beeps_enabled() is True
+
+    @patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
+    def test_beeps_can_be_disabled(self, _cfg):
+        cli = _make_voice_cli()
+        assert cli._voice_beeps_enabled() is False
+
+    @patch("cli._cprint")
+    @patch("cli.threading.Thread")
+    @patch("tools.voice_mode.play_beep")
+    @patch("tools.voice_mode.create_audio_recorder")
+    @patch(
+        "tools.voice_mode.check_voice_requirements",
+        return_value={
+            "available": True,
+            "audio_available": True,
+            "stt_available": True,
+            "details": "OK",
+            "missing_packages": [],
+        },
+    )
+    @patch(
+        "hermes_cli.config.load_config",
+        return_value={
+            "voice": {
+                "beep_enabled": False,
+                "silence_threshold": 200,
+                "silence_duration": 3.0,
+            }
+        },
+    )
+    def test_start_recording_skips_beep_when_disabled(
+        self, _cfg, _req, mock_create, mock_beep, mock_thread, _cp
+    ):
+        recorder = MagicMock()
+        recorder.supports_silence_autostop = True
+        mock_create.return_value = recorder
+        mock_thread.return_value = MagicMock(start=MagicMock())
+
+        cli = _make_voice_cli()
+        cli._voice_start_recording()
+
+        recorder.start.assert_called_once()
+        mock_beep.assert_not_called()
+
+
 class TestDisableVoiceModeReal:
    """Tests _disable_voice_mode with real CLI instance."""

@ -1087,6 +1139,16 @@ class TestVoiceStopAndTranscribeReal:
        cli._voice_stop_and_transcribe()
        assert cli._pending_input.empty()

+    @patch("cli._cprint")
+    @patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
+    @patch("tools.voice_mode.play_beep")
+    def test_no_speech_detected_skips_beep_when_disabled(self, mock_beep, _cfg, _cp):
+        recorder = MagicMock()
+        recorder.stop.return_value = None
+        cli = _make_voice_cli(_voice_recording=True, _voice_recorder=recorder)
+        cli._voice_stop_and_transcribe()
+        mock_beep.assert_not_called()
+
    @patch("cli._cprint")
    @patch("cli.os.unlink")
    @patch("cli.os.path.isfile", return_value=True)