From feddb86dbdaaa567d2e31457ea48884359ea4472 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 20 Apr 2026 23:05:38 -0700 Subject: [PATCH 01/63] fix(cli): dispatch /steer inline while agent is running (#13354) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Classic-CLI /steer typed during an active agent run was queued through self._pending_input alongside ordinary user input. process_loop, which drains that queue, is blocked inside self.chat() for the entire run, so the queued command was not pulled until AFTER _agent_running had flipped back to False — at which point process_command() took the idle fallback ("No agent running; queued as next turn") and delivered the steer as an ordinary next-turn user message. From Utku's bug report on PR #13205: mid-run /steer arrived minutes later at the end of the turn as a /queue-style message, completely defeating its purpose. Fix: add _should_handle_steer_command_inline() gating — when _agent_running is True and the user typed /steer, dispatch process_command(text) directly from the prompt_toolkit Enter handler on the UI thread instead of queueing. This mirrors the existing _should_handle_model_command_inline() pattern for /model and is safe because agent.steer() is thread-safe (uses _pending_steer_lock, no prompt_toolkit state mutation, instant return). No changes to the idle-path behavior: /steer typed with no active agent still takes the normal queue-and-drain route so the fallback "No agent running; queued as next turn" message is preserved. Validation: - 7 new unit tests in tests/cli/test_cli_steer_busy_path.py covering the detector, dispatch path, and idle-path control behavior. - All 21 existing tests in tests/run_agent/test_steer.py still pass. - Live PTY end-to-end test with real agent + real openrouter model: 22:36:22 API call #1 (model requested execute_code) 22:36:26 ENTER FIRED: agent_running=True, text='/steer ...' 22:36:26 INLINE STEER DISPATCH fired 22:36:43 agent.log: 'Delivered /steer to agent after tool batch' 22:36:44 API call #2 included the steer; response contained marker Same test on the tip of main without this fix shows the steer landing as a new user turn ~20s after the run ended. --- cli.py | 35 ++++++ tests/cli/test_cli_steer_busy_path.py | 146 ++++++++++++++++++++++++++ 2 files changed, 181 insertions(+) create mode 100644 tests/cli/test_cli_steer_busy_path.py diff --git a/cli.py b/cli.py index 68243946f4..4b315f9b61 100644 --- a/cli.py +++ b/cli.py @@ -5256,6 +5256,30 @@ class HermesCLI: except Exception: return False + def _should_handle_steer_command_inline(self, text: str, has_images: bool = False) -> bool: + """Return True when /steer should be dispatched immediately while the agent is running. + + /steer MUST bypass the normal _pending_input → process_loop path when + the agent is active, because process_loop is blocked inside + self.chat() for the duration of the run. By the time the queued + command is pulled from _pending_input, _agent_running has already + flipped back to False, and process_command() takes the idle + fallback — delivering the steer as a next-turn message instead of + injecting it mid-run. Dispatching inline on the UI thread calls + agent.steer() directly, which is thread-safe (uses _pending_steer_lock). + """ + if not text or has_images or not _looks_like_slash_command(text): + return False + if not getattr(self, "_agent_running", False): + return False + try: + from hermes_cli.commands import resolve_command + base = text.split(None, 1)[0].lower().lstrip('/') + cmd = resolve_command(base) + return bool(cmd and cmd.name == "steer") + except Exception: + return False + def _show_model_and_providers(self): """Show current model + provider and list all authenticated providers. @@ -9068,6 +9092,17 @@ class HermesCLI: event.app.current_buffer.reset(append_to_history=True) return + # Handle /steer while the agent is running immediately on the + # UI thread. Queuing through _pending_input would deadlock the + # steer until after the agent loop finishes (process_loop is + # blocked inside self.chat()), which turns /steer into a + # post-run next-turn message — defeating mid-run injection. + # agent.steer() is thread-safe (holds _pending_steer_lock). + if self._should_handle_steer_command_inline(text, has_images=has_images): + self.process_command(text) + event.app.current_buffer.reset(append_to_history=True) + return + # Snapshot and clear attached images images = list(self._attached_images) self._attached_images.clear() diff --git a/tests/cli/test_cli_steer_busy_path.py b/tests/cli/test_cli_steer_busy_path.py new file mode 100644 index 0000000000..071c741fbe --- /dev/null +++ b/tests/cli/test_cli_steer_busy_path.py @@ -0,0 +1,146 @@ +"""Regression tests for classic-CLI mid-run /steer dispatch. + +Background +---------- +/steer sent while the agent is running used to be queued through +``self._pending_input`` alongside ordinary user input. ``process_loop`` +pulls from that queue and calls ``process_command()`` — but while the +agent is running, ``process_loop`` is blocked inside ``self.chat()``. +By the time the queued /steer was pulled, ``_agent_running`` had +already flipped back to False, so ``process_command()`` took the idle +fallback (``"No agent running; queued as next turn"``) and delivered +the steer as an ordinary next-turn message. + +The fix dispatches /steer inline on the UI thread when the agent is +running — matching the existing pattern for /model — so the steer +reaches ``agent.steer()`` (thread-safe) without touching the queue. + +These tests exercise the detector + inline dispatch without starting a +prompt_toolkit app. +""" + +from __future__ import annotations + +import importlib +import sys +from unittest.mock import MagicMock, patch + + +def _make_cli(): + """Create a HermesCLI instance with prompt_toolkit stubbed out.""" + _clean_config = { + "model": { + "default": "anthropic/claude-opus-4.6", + "base_url": "https://openrouter.ai/api/v1", + "provider": "auto", + }, + "display": {"compact": False, "tool_progress": "all"}, + "agent": {}, + "terminal": {"env_type": "local"}, + } + clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""} + prompt_toolkit_stubs = { + "prompt_toolkit": MagicMock(), + "prompt_toolkit.history": MagicMock(), + "prompt_toolkit.styles": MagicMock(), + "prompt_toolkit.patch_stdout": MagicMock(), + "prompt_toolkit.application": MagicMock(), + "prompt_toolkit.layout": MagicMock(), + "prompt_toolkit.layout.processors": MagicMock(), + "prompt_toolkit.filters": MagicMock(), + "prompt_toolkit.layout.dimension": MagicMock(), + "prompt_toolkit.layout.menus": MagicMock(), + "prompt_toolkit.widgets": MagicMock(), + "prompt_toolkit.key_binding": MagicMock(), + "prompt_toolkit.completion": MagicMock(), + "prompt_toolkit.formatted_text": MagicMock(), + "prompt_toolkit.auto_suggest": MagicMock(), + } + with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict( + "os.environ", clean_env, clear=False + ): + import cli as _cli_mod + + _cli_mod = importlib.reload(_cli_mod) + with patch.object(_cli_mod, "get_tool_definitions", return_value=[]), patch.dict( + _cli_mod.__dict__, {"CLI_CONFIG": _clean_config} + ): + return _cli_mod.HermesCLI() + + +class TestSteerInlineDetector: + """_should_handle_steer_command_inline gates the busy-path fast dispatch.""" + + def test_detects_steer_when_agent_running(self): + cli = _make_cli() + cli._agent_running = True + assert cli._should_handle_steer_command_inline("/steer focus on error handling") is True + + def test_ignores_steer_when_agent_idle(self): + """Idle-path /steer should fall through to the normal process_loop + dispatch so the queue-style fallback message is emitted.""" + cli = _make_cli() + cli._agent_running = False + assert cli._should_handle_steer_command_inline("/steer do something") is False + + def test_ignores_non_slash_input(self): + cli = _make_cli() + cli._agent_running = True + assert cli._should_handle_steer_command_inline("steer without slash") is False + assert cli._should_handle_steer_command_inline("") is False + + def test_ignores_other_slash_commands(self): + cli = _make_cli() + cli._agent_running = True + assert cli._should_handle_steer_command_inline("/queue hello") is False + assert cli._should_handle_steer_command_inline("/stop") is False + assert cli._should_handle_steer_command_inline("/help") is False + + def test_ignores_steer_with_attached_images(self): + """Image payloads take the normal path; steer doesn't accept images.""" + cli = _make_cli() + cli._agent_running = True + assert cli._should_handle_steer_command_inline("/steer text", has_images=True) is False + + +class TestSteerBusyPathDispatch: + """When the detector fires, process_command('/steer ...') must call + agent.steer() directly rather than the idle-path fallback.""" + + def test_process_command_routes_to_agent_steer(self): + """With _agent_running=True and agent.steer present, /steer reaches + agent.steer(payload), NOT _pending_input.""" + cli = _make_cli() + cli._agent_running = True + cli.agent = MagicMock() + cli.agent.steer = MagicMock(return_value=True) + # Make sure the idle-path fallback would be observable if taken + cli._pending_input = MagicMock() + + cli.process_command("/steer focus on errors") + + cli.agent.steer.assert_called_once_with("focus on errors") + cli._pending_input.put.assert_not_called() + + def test_idle_path_queues_as_next_turn(self): + """Control — when the agent is NOT running, /steer correctly falls + back to next-turn queue semantics. Demonstrates why the fix was + needed: the queue path only works when you can actually drain it.""" + cli = _make_cli() + cli._agent_running = False + cli.agent = MagicMock() + cli.agent.steer = MagicMock(return_value=True) + cli._pending_input = MagicMock() + + cli.process_command("/steer would-be-next-turn") + + # Idle path does NOT call agent.steer + cli.agent.steer.assert_not_called() + # It puts the payload in the queue as a normal next-turn message + cli._pending_input.put.assert_called_once_with("would-be-next-turn") + + +if __name__ == "__main__": # pragma: no cover + import pytest + + pytest.main([__file__, "-v"]) From 7ab5eebd0365b2cd69daa489f34a05847da65b2a Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Mon, 20 Apr 2026 20:13:33 +0530 Subject: [PATCH 02/63] feat: add transport types + migrate Anthropic normalize path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add agent/transports/types.py with three shared dataclasses: - NormalizedResponse: content, tool_calls, finish_reason, reasoning, usage, provider_data - ToolCall: id, name, arguments, provider_data (per-tool-call protocol metadata) - Usage: prompt_tokens, completion_tokens, total_tokens, cached_tokens Add normalize_anthropic_response_v2() to anthropic_adapter.py — wraps the existing v1 function and maps its output to NormalizedResponse. One call site in run_agent.py (the main normalize branch) uses v2 with a back-compat shim to SimpleNamespace for downstream code. No ABC, no registry, no streaming, no client lifecycle. Those land in PR 3 with the first concrete transport (AnthropicTransport). 46 new tests: - test_types.py: dataclass construction, build_tool_call, map_finish_reason - test_anthropic_normalize_v2.py: v1-vs-v2 regression tests (text, tools, thinking, mixed, stop reasons, mcp prefix stripping, edge cases) Part of the provider transport refactor (PR 2 of 9). --- agent/anthropic_adapter.py | 39 ++++ agent/transports/__init__.py | 1 + agent/transports/types.py | 100 +++++++++ run_agent.py | 27 ++- tests/agent/test_anthropic_normalize_v2.py | 238 +++++++++++++++++++++ tests/agent/transports/__init__.py | 0 tests/agent/transports/test_types.py | 151 +++++++++++++ 7 files changed, 554 insertions(+), 2 deletions(-) create mode 100644 agent/transports/__init__.py create mode 100644 agent/transports/types.py create mode 100644 tests/agent/test_anthropic_normalize_v2.py create mode 100644 tests/agent/transports/__init__.py create mode 100644 tests/agent/transports/test_types.py diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index bf2b8a62c5..d8d181cc10 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -1525,3 +1525,42 @@ def normalize_anthropic_response( ), finish_reason, ) + + +def normalize_anthropic_response_v2( + response, + strip_tool_prefix: bool = False, +) -> "NormalizedResponse": + """Normalize Anthropic response to NormalizedResponse. + + Wraps the existing normalize_anthropic_response() and maps its output + to the shared transport types. This allows incremental migration — + one call site at a time — without changing the original function. + """ + from agent.transports.types import NormalizedResponse, build_tool_call + + assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix) + + tool_calls = None + if assistant_msg.tool_calls: + tool_calls = [ + build_tool_call( + id=tc.id, + name=tc.function.name, + arguments=tc.function.arguments, + ) + for tc in assistant_msg.tool_calls + ] + + provider_data = {} + if getattr(assistant_msg, "reasoning_details", None): + provider_data["reasoning_details"] = assistant_msg.reasoning_details + + return NormalizedResponse( + content=assistant_msg.content, + tool_calls=tool_calls, + finish_reason=finish_reason, + reasoning=getattr(assistant_msg, "reasoning", None), + usage=None, # Anthropic usage is on the raw response, not the normaliser + provider_data=provider_data or None, + ) diff --git a/agent/transports/__init__.py b/agent/transports/__init__.py new file mode 100644 index 0000000000..6ee1c51174 --- /dev/null +++ b/agent/transports/__init__.py @@ -0,0 +1 @@ +"""Transport layer types for provider response normalization.""" diff --git a/agent/transports/types.py b/agent/transports/types.py new file mode 100644 index 0000000000..2b048fcaa4 --- /dev/null +++ b/agent/transports/types.py @@ -0,0 +1,100 @@ +"""Shared types for normalized provider responses. + +These dataclasses define the canonical shape that all provider adapters +normalize responses to. The shared surface is intentionally minimal — +only fields that every downstream consumer reads are top-level. +Protocol-specific state goes in ``provider_data`` dicts (response-level +and per-tool-call) so that protocol-aware code paths can access it +without polluting the shared type. +""" + +from __future__ import annotations + +import json +from dataclasses import dataclass, field +from typing import Any, Dict, List, Optional + + +@dataclass +class ToolCall: + """A normalized tool call from any provider. + + ``id`` is the protocol's canonical identifier — what gets used in + ``tool_call_id`` / ``tool_use_id`` when constructing tool result + messages. May be ``None`` when the provider omits it; the agent + fills it via ``_deterministic_call_id()`` before storing in history. + + ``provider_data`` carries per-tool-call protocol metadata that only + protocol-aware code reads: + + * Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}`` + * Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}`` + * Others: ``None`` + """ + + id: Optional[str] + name: str + arguments: str # JSON string + provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False) + + +@dataclass +class Usage: + """Token usage from an API response.""" + + prompt_tokens: int = 0 + completion_tokens: int = 0 + total_tokens: int = 0 + cached_tokens: int = 0 + + +@dataclass +class NormalizedResponse: + """Normalized API response from any provider. + + Shared fields are truly cross-provider — every caller can rely on + them without branching on api_mode. Protocol-specific state goes in + ``provider_data`` so that only protocol-aware code paths read it. + + Response-level ``provider_data`` examples: + + * Anthropic: ``{"reasoning_details": [...]}`` + * Codex: ``{"codex_reasoning_items": [...]}`` + * Others: ``None`` + """ + + content: Optional[str] + tool_calls: Optional[List[ToolCall]] + finish_reason: str # "stop", "tool_calls", "length", "content_filter" + reasoning: Optional[str] = None + usage: Optional[Usage] = None + provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False) + + +# --------------------------------------------------------------------------- +# Factory helpers +# --------------------------------------------------------------------------- + +def build_tool_call( + id: Optional[str], + name: str, + arguments: Any, + **provider_fields: Any, +) -> ToolCall: + """Build a ``ToolCall``, auto-serialising *arguments* if it's a dict. + + Any extra keyword arguments are collected into ``provider_data``. + """ + args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments) + pd = dict(provider_fields) if provider_fields else None + return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd) + + +def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str: + """Translate a provider-specific stop reason to the normalised set. + + Falls back to ``"stop"`` for unknown or ``None`` reasons. + """ + if reason is None: + return "stop" + return mapping.get(reason, "stop") diff --git a/run_agent.py b/run_agent.py index 49240d70f1..e69d30ff2c 100644 --- a/run_agent.py +++ b/run_agent.py @@ -10778,10 +10778,33 @@ class AIAgent: if self.api_mode == "codex_responses": assistant_message, finish_reason = self._normalize_codex_response(response) elif self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import normalize_anthropic_response - assistant_message, finish_reason = normalize_anthropic_response( + from agent.anthropic_adapter import normalize_anthropic_response_v2 + _nr = normalize_anthropic_response_v2( response, strip_tool_prefix=self._is_anthropic_oauth ) + # Back-compat shim: downstream code expects SimpleNamespace with + # .content, .tool_calls, .reasoning, .reasoning_content, + # .reasoning_details attributes. This shim makes the cost of the + # old interface visible — it vanishes when the full transport + # wiring lands (PR 3+). + assistant_message = SimpleNamespace( + content=_nr.content, + tool_calls=[ + SimpleNamespace( + id=tc.id, + type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) + for tc in (_nr.tool_calls or []) + ] or None, + reasoning=_nr.reasoning, + reasoning_content=None, + reasoning_details=( + _nr.provider_data.get("reasoning_details") + if _nr.provider_data else None + ), + ) + finish_reason = _nr.finish_reason else: assistant_message = response.choices[0].message diff --git a/tests/agent/test_anthropic_normalize_v2.py b/tests/agent/test_anthropic_normalize_v2.py new file mode 100644 index 0000000000..9d5c16139a --- /dev/null +++ b/tests/agent/test_anthropic_normalize_v2.py @@ -0,0 +1,238 @@ +"""Regression tests: normalize_anthropic_response_v2 vs v1. + +Constructs mock Anthropic responses and asserts that the v2 function +(returning NormalizedResponse) produces identical field values to the +original v1 function (returning SimpleNamespace + finish_reason). +""" + +import json +import pytest +from types import SimpleNamespace + +from agent.anthropic_adapter import ( + normalize_anthropic_response, + normalize_anthropic_response_v2, +) +from agent.transports.types import NormalizedResponse, ToolCall + + +# --------------------------------------------------------------------------- +# Helpers to build mock Anthropic SDK responses +# --------------------------------------------------------------------------- + +def _text_block(text: str): + return SimpleNamespace(type="text", text=text) + + +def _thinking_block(thinking: str, signature: str = "sig_abc"): + return SimpleNamespace(type="thinking", thinking=thinking, signature=signature) + + +def _tool_use_block(id: str, name: str, input: dict): + return SimpleNamespace(type="tool_use", id=id, name=name, input=input) + + +def _response(content_blocks, stop_reason="end_turn"): + return SimpleNamespace( + content=content_blocks, + stop_reason=stop_reason, + usage=SimpleNamespace( + input_tokens=10, + output_tokens=5, + ), + ) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + +class TestTextOnly: + """Text-only response — no tools, no thinking.""" + + def setup_method(self): + self.resp = _response([_text_block("Hello world")]) + self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp) + self.v2 = normalize_anthropic_response_v2(self.resp) + + def test_type(self): + assert isinstance(self.v2, NormalizedResponse) + + def test_content_matches(self): + assert self.v2.content == self.v1_msg.content + + def test_finish_reason_matches(self): + assert self.v2.finish_reason == self.v1_finish + + def test_no_tool_calls(self): + assert self.v2.tool_calls is None + assert self.v1_msg.tool_calls is None + + def test_no_reasoning(self): + assert self.v2.reasoning is None + assert self.v1_msg.reasoning is None + + +class TestWithToolCalls: + """Response with tool calls.""" + + def setup_method(self): + self.resp = _response( + [ + _text_block("I'll check that"), + _tool_use_block("toolu_abc", "terminal", {"command": "ls"}), + _tool_use_block("toolu_def", "read_file", {"path": "/tmp"}), + ], + stop_reason="tool_use", + ) + self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp) + self.v2 = normalize_anthropic_response_v2(self.resp) + + def test_finish_reason(self): + assert self.v2.finish_reason == "tool_calls" + assert self.v1_finish == "tool_calls" + + def test_tool_call_count(self): + assert len(self.v2.tool_calls) == 2 + assert len(self.v1_msg.tool_calls) == 2 + + def test_tool_call_ids_match(self): + for i in range(2): + assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id + + def test_tool_call_names_match(self): + assert self.v2.tool_calls[0].name == "terminal" + assert self.v2.tool_calls[1].name == "read_file" + for i in range(2): + assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name + + def test_tool_call_arguments_match(self): + for i in range(2): + assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments + + def test_content_preserved(self): + assert self.v2.content == self.v1_msg.content + assert "check that" in self.v2.content + + +class TestWithThinking: + """Response with thinking blocks (Claude 3.5+ extended thinking).""" + + def setup_method(self): + self.resp = _response([ + _thinking_block("Let me think about this carefully..."), + _text_block("The answer is 42."), + ]) + self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp) + self.v2 = normalize_anthropic_response_v2(self.resp) + + def test_reasoning_matches(self): + assert self.v2.reasoning == self.v1_msg.reasoning + assert "think about this" in self.v2.reasoning + + def test_reasoning_details_in_provider_data(self): + v1_details = self.v1_msg.reasoning_details + v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None + assert v1_details is not None + assert v2_details is not None + assert len(v2_details) == len(v1_details) + + def test_content_excludes_thinking(self): + assert self.v2.content == "The answer is 42." + + +class TestMixed: + """Response with thinking + text + tool calls.""" + + def setup_method(self): + self.resp = _response( + [ + _thinking_block("Planning my approach..."), + _text_block("I'll run the command"), + _tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}), + ], + stop_reason="tool_use", + ) + self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp) + self.v2 = normalize_anthropic_response_v2(self.resp) + + def test_all_fields_present(self): + assert self.v2.content is not None + assert self.v2.tool_calls is not None + assert self.v2.reasoning is not None + assert self.v2.finish_reason == "tool_calls" + + def test_content_matches(self): + assert self.v2.content == self.v1_msg.content + + def test_reasoning_matches(self): + assert self.v2.reasoning == self.v1_msg.reasoning + + def test_tool_call_matches(self): + assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id + assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name + + +class TestStopReasons: + """Verify finish_reason mapping matches between v1 and v2.""" + + @pytest.mark.parametrize("stop_reason,expected", [ + ("end_turn", "stop"), + ("tool_use", "tool_calls"), + ("max_tokens", "length"), + ("stop_sequence", "stop"), + ("refusal", "content_filter"), + ("model_context_window_exceeded", "length"), + ("unknown_future_reason", "stop"), + ]) + def test_stop_reason_mapping(self, stop_reason, expected): + resp = _response([_text_block("x")], stop_reason=stop_reason) + v1_msg, v1_finish = normalize_anthropic_response(resp) + v2 = normalize_anthropic_response_v2(resp) + assert v2.finish_reason == v1_finish == expected + + +class TestStripToolPrefix: + """Verify mcp_ prefix stripping works identically.""" + + def test_prefix_stripped(self): + resp = _response( + [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})], + stop_reason="tool_use", + ) + v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True) + v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True) + assert v1_msg.tool_calls[0].function.name == "terminal" + assert v2.tool_calls[0].name == "terminal" + + def test_prefix_kept(self): + resp = _response( + [_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})], + stop_reason="tool_use", + ) + v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False) + v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False) + assert v1_msg.tool_calls[0].function.name == "mcp_terminal" + assert v2.tool_calls[0].name == "mcp_terminal" + + +class TestEdgeCases: + """Edge cases: empty content, no blocks, etc.""" + + def test_empty_content_blocks(self): + resp = _response([]) + v1_msg, v1_finish = normalize_anthropic_response(resp) + v2 = normalize_anthropic_response_v2(resp) + assert v2.content == v1_msg.content + assert v2.content is None + + def test_no_reasoning_details_means_none_provider_data(self): + resp = _response([_text_block("hi")]) + v2 = normalize_anthropic_response_v2(resp) + assert v2.provider_data is None + + def test_v2_returns_dataclass_not_namespace(self): + resp = _response([_text_block("hi")]) + v2 = normalize_anthropic_response_v2(resp) + assert isinstance(v2, NormalizedResponse) + assert not isinstance(v2, SimpleNamespace) diff --git a/tests/agent/transports/__init__.py b/tests/agent/transports/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/agent/transports/test_types.py b/tests/agent/transports/test_types.py new file mode 100644 index 0000000000..0be18c688c --- /dev/null +++ b/tests/agent/transports/test_types.py @@ -0,0 +1,151 @@ +"""Tests for agent/transports/types.py — dataclass construction + helpers.""" + +import json +import pytest + +from agent.transports.types import ( + NormalizedResponse, + ToolCall, + Usage, + build_tool_call, + map_finish_reason, +) + + +# --------------------------------------------------------------------------- +# ToolCall +# --------------------------------------------------------------------------- + +class TestToolCall: + def test_basic_construction(self): + tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}') + assert tc.id == "call_abc" + assert tc.name == "terminal" + assert tc.arguments == '{"cmd": "ls"}' + assert tc.provider_data is None + + def test_none_id(self): + tc = ToolCall(id=None, name="read_file", arguments="{}") + assert tc.id is None + + def test_provider_data(self): + tc = ToolCall( + id="call_x", + name="t", + arguments="{}", + provider_data={"call_id": "call_x", "response_item_id": "fc_x"}, + ) + assert tc.provider_data["call_id"] == "call_x" + assert tc.provider_data["response_item_id"] == "fc_x" + + +# --------------------------------------------------------------------------- +# Usage +# --------------------------------------------------------------------------- + +class TestUsage: + def test_defaults(self): + u = Usage() + assert u.prompt_tokens == 0 + assert u.completion_tokens == 0 + assert u.total_tokens == 0 + assert u.cached_tokens == 0 + + def test_explicit(self): + u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80) + assert u.total_tokens == 150 + + +# --------------------------------------------------------------------------- +# NormalizedResponse +# --------------------------------------------------------------------------- + +class TestNormalizedResponse: + def test_text_only(self): + r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop") + assert r.content == "hello" + assert r.tool_calls is None + assert r.finish_reason == "stop" + assert r.reasoning is None + assert r.usage is None + assert r.provider_data is None + + def test_with_tool_calls(self): + tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')] + r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls") + assert r.finish_reason == "tool_calls" + assert len(r.tool_calls) == 1 + assert r.tool_calls[0].name == "terminal" + + def test_with_reasoning(self): + r = NormalizedResponse( + content="answer", + tool_calls=None, + finish_reason="stop", + reasoning="I thought about it", + ) + assert r.reasoning == "I thought about it" + + def test_with_provider_data(self): + r = NormalizedResponse( + content=None, + tool_calls=None, + finish_reason="stop", + provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]}, + ) + assert r.provider_data["reasoning_details"][0]["type"] == "thinking" + + +# --------------------------------------------------------------------------- +# build_tool_call +# --------------------------------------------------------------------------- + +class TestBuildToolCall: + def test_dict_arguments_serialized(self): + tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"}) + assert tc.arguments == json.dumps({"cmd": "ls"}) + assert tc.provider_data is None + + def test_string_arguments_passthrough(self): + tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}') + assert tc.arguments == '{"path": "/tmp"}' + + def test_provider_fields(self): + tc = build_tool_call( + id="call_3", + name="terminal", + arguments="{}", + call_id="call_3", + response_item_id="fc_3", + ) + assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"} + + def test_none_id(self): + tc = build_tool_call(id=None, name="t", arguments="{}") + assert tc.id is None + + +# --------------------------------------------------------------------------- +# map_finish_reason +# --------------------------------------------------------------------------- + +class TestMapFinishReason: + ANTHROPIC_MAP = { + "end_turn": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "stop_sequence": "stop", + "refusal": "content_filter", + } + + def test_known_reason(self): + assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop" + assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls" + assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length" + assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter" + + def test_unknown_reason_defaults_to_stop(self): + assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop" + + def test_none_reason(self): + assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop" From 62cbeb63678e75f0975e936ad2a88c7913468176 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 20 Apr 2026 23:20:33 -0700 Subject: [PATCH 03/63] =?UTF-8?q?test:=20stop=20testing=20mutable=20data?= =?UTF-8?q?=20=E2=80=94=20convert=20change-detectors=20to=20invariants=20(?= =?UTF-8?q?#13363)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Catalog snapshots, config version literals, and enumeration counts are data that changes as designed. Tests that assert on those values add no behavioral coverage — they just break CI on every routine update and cost engineering time to 'fix.' Replace with invariants where one exists, delete where none does. Deleted (pure snapshots): - TestMinimaxModelCatalog (3 tests): 'MiniMax-M2.7 in models' et al - TestGeminiModelCatalog: 'gemini-2.5-pro in models', 'gemini-3.x in models' - test_browser_camofox_state::test_config_version_matches_current_schema (docstring literally said it would break on unrelated bumps) Relaxed (keep plumbing check, drop snapshot): - Xiaomi / Arcee / Kimi moonshot / Kimi coding / HuggingFace static lists: now assert 'provider exists and has >= 1 entry' instead of specific names - HuggingFace main/models.py consistency test: drop 'len >= 6' floor Dynamicized (follow source, not a literal): - 3x test_config.py migration tests: raw['_config_version'] == DEFAULT_CONFIG['_config_version'] instead of hardcoded 21 Fixed stale tests against intentional behavior changes: - test_insights::test_gateway_format_hides_cost: name matches new behavior (no dollar figures); remove contradicting '$' in text assertion - test_config::prefers_api_then_url_then_base_url: flipped per PR #9332; rename + update to base_url > url > api - test_anthropic_adapter: relax assert_called_once() (xdist-flaky) to assert called — contract is 'credential flowed through' - test_interrupt_propagation: add provider/model/_base_url to bare-agent fixture so the stale-timeout code path resolves Fixed stale integration tests against opt-in plugin gate: - transform_tool_result + transform_terminal_output: write plugins.enabled allow-list to config.yaml and reset the plugin manager singleton Source fix (real consistency invariant): - agent/model_metadata.py: add moonshotai/Kimi-K2.6 context length (262144, same as K2.5). test_model_metadata_has_context_lengths was correctly catching the gap. Policy: - AGENTS.md Testing section: new subsection 'Don't write change-detector tests' with do/don't examples. Reviewers should reject catalog-snapshot assertions in new tests. Covers every test that failed on the last completed main CI run (24703345583) except test_modal_sandbox_fixes::test_terminal_tool_present + test_terminal_and_file_toolsets_resolve_all_tools, which now pass both alone and with the full tests/tools/ directory (xdist ordering flake that resolved itself). --- AGENTS.md | 49 +++++++++++++++++++ agent/model_metadata.py | 1 + tests/agent/test_anthropic_adapter.py | 6 ++- tests/agent/test_insights.py | 5 +- tests/agent/test_minimax_provider.py | 32 ------------ tests/hermes_cli/test_api_key_providers.py | 18 +++---- tests/hermes_cli/test_arcee_provider.py | 8 +-- tests/hermes_cli/test_config.py | 14 ++++-- tests/hermes_cli/test_gemini_provider.py | 16 ++---- tests/hermes_cli/test_xiaomi_provider.py | 12 +++-- tests/run_agent/test_interrupt_propagation.py | 5 ++ tests/test_transform_tool_result_hook.py | 10 ++++ tests/tools/test_browser_camofox_state.py | 7 --- .../test_terminal_output_transform_hook.py | 10 ++++ 14 files changed, 113 insertions(+), 80 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 8bd979b058..0f5ce15f28 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -566,3 +566,52 @@ python -m pytest tests/ -q -n 4 Worker count above 4 will surface test-ordering flakes that CI never sees. Always run the full suite before pushing changes. + +### Don't write change-detector tests + +A test is a **change-detector** if it fails whenever data that is **expected +to change** gets updated — model catalogs, config version numbers, +enumeration counts, hardcoded lists of provider models. These tests add no +behavioral coverage; they just guarantee that routine source updates break +CI and cost engineering time to "fix." + +**Do not write:** + +```python +# catalog snapshot — breaks every model release +assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"] +assert "MiniMax-M2.7" in models + +# config version literal — breaks every schema bump +assert DEFAULT_CONFIG["_config_version"] == 21 + +# enumeration count — breaks every time a skill/provider is added +assert len(_PROVIDER_MODELS["huggingface"]) == 8 +``` + +**Do write:** + +```python +# behavior: does the catalog plumbing work at all? +assert "gemini" in _PROVIDER_MODELS +assert len(_PROVIDER_MODELS["gemini"]) >= 1 + +# behavior: does migration bump the user's version to current latest? +assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] + +# invariant: no plan-only model leaks into the legacy list +assert not (set(moonshot_models) & coding_plan_only_models) + +# invariant: every model in the catalog has a context-length entry +for m in _PROVIDER_MODELS["huggingface"]: + assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER +``` + +The rule: if the test reads like a snapshot of current data, delete it. If +it reads like a contract about how two pieces of data must relate, keep it. +When a PR adds a new provider/model and you want a test, make the test +assert the relationship (e.g. "catalog entries all have context lengths"), +not the specific names. + +Reviewers should reject new change-detector tests; authors should convert +them into invariants before re-requesting review. diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 47f9bba94f..6506bffe6d 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -170,6 +170,7 @@ DEFAULT_CONTEXT_LENGTHS = { "Qwen/Qwen3.5-35B-A3B": 131072, "deepseek-ai/DeepSeek-V3.2": 65536, "moonshotai/Kimi-K2.5": 262144, + "moonshotai/Kimi-K2.6": 262144, "moonshotai/Kimi-K2-Thinking": 262144, "MiniMaxAI/MiniMax-M2.5": 204800, "XiaomiMiMo/MiMo-V2-Flash": 256000, diff --git a/tests/agent/test_anthropic_adapter.py b/tests/agent/test_anthropic_adapter.py index 737db01a35..b947a2df85 100644 --- a/tests/agent/test_anthropic_adapter.py +++ b/tests/agent/test_anthropic_adapter.py @@ -414,7 +414,11 @@ class TestRunOauthSetupToken: token = run_oauth_setup_token() assert token == "from-cred-file" - mock_run.assert_called_once() + # Don't assert exact call count — the contract is "credentials flow + # through", not "exactly one subprocess call". xdist cross-test + # pollution (other tests shimming subprocess via plugins) has flaked + # assert_called_once() in CI. + assert mock_run.called def test_returns_token_from_env_var(self, monkeypatch, tmp_path): """Falls back to CLAUDE_CODE_OAUTH_TOKEN env var when no cred files.""" diff --git a/tests/agent/test_insights.py b/tests/agent/test_insights.py index 4067c92157..2740daf096 100644 --- a/tests/agent/test_insights.py +++ b/tests/agent/test_insights.py @@ -516,13 +516,12 @@ class TestGatewayFormatting: assert "**" in text # Markdown bold def test_gateway_format_hides_cost(self, populated_db): + """Gateway format omits dollar figures and internal cache details.""" engine = InsightsEngine(populated_db) report = engine.generate(days=30) text = engine.format_gateway(report) - assert "$" in text - assert "Top Skills" in text - assert "Est. cost" in text + assert "$" not in text assert "cache" not in text.lower() def test_gateway_format_shows_models(self, populated_db): diff --git a/tests/agent/test_minimax_provider.py b/tests/agent/test_minimax_provider.py index 85c9c95206..4356b61c5a 100644 --- a/tests/agent/test_minimax_provider.py +++ b/tests/agent/test_minimax_provider.py @@ -84,38 +84,6 @@ class TestMinimaxAuxModel: assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] -class TestMinimaxModelCatalog: - """Verify the model catalog matches official Anthropic-compat endpoint models. - - Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api - """ - - def test_catalog_includes_current_models(self): - from hermes_cli.models import _PROVIDER_MODELS - for provider in ("minimax", "minimax-cn"): - models = _PROVIDER_MODELS[provider] - assert "MiniMax-M2.7" in models - assert "MiniMax-M2.5" in models - assert "MiniMax-M2.1" in models - assert "MiniMax-M2" in models - - def test_catalog_excludes_m1_family(self): - """M1 models are not available on the /anthropic endpoint.""" - from hermes_cli.models import _PROVIDER_MODELS - for provider in ("minimax", "minimax-cn"): - models = _PROVIDER_MODELS[provider] - assert "MiniMax-M1" not in models - - def test_catalog_excludes_highspeed(self): - """Highspeed variants are available but not shown in default catalog - (users can still specify them manually).""" - from hermes_cli.models import _PROVIDER_MODELS - for provider in ("minimax", "minimax-cn"): - models = _PROVIDER_MODELS[provider] - assert "MiniMax-M2.7-highspeed" not in models - assert "MiniMax-M2.5-highspeed" not in models - - class TestMinimaxBetaHeaders: """MiniMax Anthropic-compat endpoints reject fine-grained-tool-streaming beta. diff --git a/tests/hermes_cli/test_api_key_providers.py b/tests/hermes_cli/test_api_key_providers.py index c56edc4bb2..2af003ea08 100644 --- a/tests/hermes_cli/test_api_key_providers.py +++ b/tests/hermes_cli/test_api_key_providers.py @@ -921,17 +921,13 @@ class TestKimiMoonshotModelListIsolation: leaked = set(moonshot_models) & coding_plan_only assert not leaked, f"Moonshot list contains Coding Plan-only models: {leaked}" - def test_moonshot_list_contains_shared_models(self): + def test_moonshot_list_non_empty(self): from hermes_cli.main import _PROVIDER_MODELS - moonshot_models = _PROVIDER_MODELS["moonshot"] - assert "kimi-k2.5" in moonshot_models - assert "kimi-k2-thinking" in moonshot_models + assert len(_PROVIDER_MODELS["moonshot"]) >= 1 - def test_coding_plan_list_contains_plan_specific_models(self): + def test_coding_plan_list_non_empty(self): from hermes_cli.main import _PROVIDER_MODELS - coding_models = _PROVIDER_MODELS["kimi-coding"] - assert "kimi-for-coding" in coding_models - assert "kimi-k2-thinking-turbo" in coding_models + assert len(_PROVIDER_MODELS["kimi-coding"]) >= 1 # ============================================================================= @@ -944,14 +940,12 @@ class TestHuggingFaceModels: def test_main_provider_models_has_huggingface(self): from hermes_cli.main import _PROVIDER_MODELS assert "huggingface" in _PROVIDER_MODELS - models = _PROVIDER_MODELS["huggingface"] - assert len(models) >= 6, "Expected at least 6 curated HF models" + assert len(_PROVIDER_MODELS["huggingface"]) >= 1 def test_models_py_has_huggingface(self): from hermes_cli.models import _PROVIDER_MODELS assert "huggingface" in _PROVIDER_MODELS - models = _PROVIDER_MODELS["huggingface"] - assert len(models) >= 6 + assert len(_PROVIDER_MODELS["huggingface"]) >= 1 def test_model_lists_match(self): """Model lists in main.py and models.py should be identical.""" diff --git a/tests/hermes_cli/test_arcee_provider.py b/tests/hermes_cli/test_arcee_provider.py index 39b4e57876..e9eea77f93 100644 --- a/tests/hermes_cli/test_arcee_provider.py +++ b/tests/hermes_cli/test_arcee_provider.py @@ -115,12 +115,12 @@ class TestArceeCredentials: class TestArceeModelCatalog: def test_static_model_list(self): + """Arcee has a static _PROVIDER_MODELS catalog entry. Specific model + names change with releases and don't belong in tests. + """ from hermes_cli.models import _PROVIDER_MODELS assert "arcee" in _PROVIDER_MODELS - models = _PROVIDER_MODELS["arcee"] - assert "trinity-large-thinking" in models - assert "trinity-large-preview" in models - assert "trinity-mini" in models + assert len(_PROVIDER_MODELS["arcee"]) >= 1 def test_canonical_provider_entry(self): from hermes_cli.models import CANONICAL_PROVIDERS diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py index 8c94902e68..5c719cbc21 100644 --- a/tests/hermes_cli/test_config.py +++ b/tests/hermes_cli/test_config.py @@ -459,7 +459,8 @@ class TestCustomProviderCompatibility: migrate_config(interactive=False, quiet=True) raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) - assert raw["_config_version"] == 21 + from hermes_cli.config import DEFAULT_CONFIG + assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] assert raw["providers"]["openai-direct"] == { "api": "https://api.openai.com/v1", "api_key": "test-key", @@ -501,7 +502,8 @@ class TestCustomProviderCompatibility: assert compatible[0]["provider_key"] == "openai-direct" assert compatible[0]["api_mode"] == "codex_responses" - def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path): + def test_compatible_custom_providers_prefers_base_url_then_url_then_api(self, tmp_path): + """URL field precedence is base_url > url > api (PR #9332).""" config_path = tmp_path / "config.yaml" config_path.write_text( yaml.safe_dump( @@ -526,7 +528,7 @@ class TestCustomProviderCompatibility: assert compatible == [ { "name": "My Provider", - "base_url": "https://api.example.com/v1", + "base_url": "https://base.example.com/v1", "provider_key": "my-provider", } ] @@ -606,7 +608,8 @@ class TestInterimAssistantMessageConfig: migrate_config(interactive=False, quiet=True) raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) - assert raw["_config_version"] == 21 + from hermes_cli.config import DEFAULT_CONFIG + assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] assert raw["display"]["tool_progress"] == "off" assert raw["display"]["interim_assistant_messages"] is True @@ -626,7 +629,8 @@ class TestDiscordChannelPromptsConfig: migrate_config(interactive=False, quiet=True) raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) - assert raw["_config_version"] == 21 + from hermes_cli.config import DEFAULT_CONFIG + assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"] assert raw["discord"]["auto_thread"] is True assert raw["discord"]["channel_prompts"] == {} diff --git a/tests/hermes_cli/test_gemini_provider.py b/tests/hermes_cli/test_gemini_provider.py index 7f9348be43..1daeb281f0 100644 --- a/tests/hermes_cli/test_gemini_provider.py +++ b/tests/hermes_cli/test_gemini_provider.py @@ -125,18 +125,12 @@ class TestGeminiCredentials: # ── Model Catalog ── class TestGeminiModelCatalog: - def test_provider_models_exist(self): + def test_provider_entry_exists(self): + """Gemini provider has a model catalog entry. Specific model names + are data that changes with Google releases and don't belong in tests. + """ assert "gemini" in _PROVIDER_MODELS - models = _PROVIDER_MODELS["gemini"] - assert "gemini-2.5-pro" in models - assert "gemini-2.5-flash" in models - assert "gemma-4-31b-it" not in models - - def test_provider_models_has_3x(self): - models = _PROVIDER_MODELS["gemini"] - assert "gemini-3.1-pro-preview" in models - assert "gemini-3-flash-preview" in models - assert "gemini-3.1-flash-lite-preview" in models + assert len(_PROVIDER_MODELS["gemini"]) >= 1 def test_provider_label(self): assert "gemini" in _PROVIDER_LABELS diff --git a/tests/hermes_cli/test_xiaomi_provider.py b/tests/hermes_cli/test_xiaomi_provider.py index 57e5bdda85..f26740483c 100644 --- a/tests/hermes_cli/test_xiaomi_provider.py +++ b/tests/hermes_cli/test_xiaomi_provider.py @@ -136,13 +136,15 @@ class TestXiaomiModelCatalog: assert PROVIDER_TO_MODELS_DEV["xiaomi"] == "xiaomi" def test_static_model_list_fallback(self): - """Static _PROVIDER_MODELS fallback must exist for model picker.""" + """Static _PROVIDER_MODELS fallback must exist for model picker. + + We only assert the provider key is present — the specific model + names are data that changes with upstream releases and doesn't + belong in tests. + """ from hermes_cli.models import _PROVIDER_MODELS assert "xiaomi" in _PROVIDER_MODELS - models = _PROVIDER_MODELS["xiaomi"] - assert "mimo-v2-pro" in models - assert "mimo-v2-omni" in models - assert "mimo-v2-flash" in models + assert len(_PROVIDER_MODELS["xiaomi"]) >= 1 def test_list_agentic_models_mock(self, monkeypatch): """When models.dev returns Xiaomi data, list_agentic_models should return models.""" diff --git a/tests/run_agent/test_interrupt_propagation.py b/tests/run_agent/test_interrupt_propagation.py index ed1f21bfa1..9dd8ce327e 100644 --- a/tests/run_agent/test_interrupt_propagation.py +++ b/tests/run_agent/test_interrupt_propagation.py @@ -33,6 +33,11 @@ class TestInterruptPropagationToChild(unittest.TestCase): agent._active_children = [] agent._active_children_lock = threading.Lock() agent.quiet_mode = True + # Provider/model/base_url are read by stale-timeout resolution paths; + # the specific values don't matter for interrupt tests. + agent.provider = "openrouter" + agent.model = "test/model" + agent._base_url = "http://localhost:1234" return agent def test_parent_interrupt_sets_child_flag(self): diff --git a/tests/test_transform_tool_result_hook.py b/tests/test_transform_tool_result_hook.py index 159446fd57..508c0bdc0c 100644 --- a/tests/test_transform_tool_result_hook.py +++ b/tests/test_transform_tool_result_hook.py @@ -161,6 +161,8 @@ def test_transform_tool_result_runs_after_post_tool_call(monkeypatch): def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_path): """End-to-end: load a real plugin from HERMES_HOME and verify it rewrites results.""" + import yaml + hermes_home = Path(os.environ["HERMES_HOME"]) plugins_dir = hermes_home / "plugins" plugin_dir = plugins_dir / "transform_result_canon" @@ -172,7 +174,15 @@ def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_pat 'lambda **kw: f\'CANON[{kw["tool_name"]}]\' + kw["result"])\n', encoding="utf-8", ) + # Plugins are opt-in — must be listed in plugins.enabled to load. + cfg_path = hermes_home / "config.yaml" + cfg_path.write_text( + yaml.safe_dump({"plugins": {"enabled": ["transform_result_canon"]}}), + encoding="utf-8", + ) + # Force a fresh plugin manager so the new config is picked up. + plugins_mod._plugin_manager = plugins_mod.PluginManager() plugins_mod.discover_plugins() out = _run_handle_function_call( diff --git a/tests/tools/test_browser_camofox_state.py b/tests/tools/test_browser_camofox_state.py index f726dd777c..9ce3d13202 100644 --- a/tests/tools/test_browser_camofox_state.py +++ b/tests/tools/test_browser_camofox_state.py @@ -58,10 +58,3 @@ class TestCamofoxConfigDefaults: browser_cfg = DEFAULT_CONFIG["browser"] assert browser_cfg["camofox"]["managed_persistence"] is False - - def test_config_version_matches_current_schema(self): - from hermes_cli.config import DEFAULT_CONFIG - - # The current schema version is tracked globally; unrelated default - # options may bump it after browser defaults are added. - assert DEFAULT_CONFIG["_config_version"] == 20 diff --git a/tests/tools/test_terminal_output_transform_hook.py b/tests/tools/test_terminal_output_transform_hook.py index bdbdcc0f5d..ccba7f77c1 100644 --- a/tests/tools/test_terminal_output_transform_hook.py +++ b/tests/tools/test_terminal_output_transform_hook.py @@ -173,6 +173,8 @@ def test_terminal_output_transform_does_not_change_approval_or_exit_code_meaning def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp_path): + import yaml + hermes_home = Path(os.environ["HERMES_HOME"]) plugins_dir = hermes_home / "plugins" plugin_dir = plugins_dir / "terminal_transform" @@ -184,7 +186,15 @@ def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp 'lambda **kw: "PLUGIN-HEAD\\n" + kw["output"] + "\\nPLUGIN-TAIL")\n', encoding="utf-8", ) + # Plugins are opt-in — must be listed in plugins.enabled to load. + cfg_path = hermes_home / "config.yaml" + cfg_path.write_text( + yaml.safe_dump({"plugins": {"enabled": ["terminal_transform"]}}), + encoding="utf-8", + ) + # Force a fresh plugin manager so the new config is picked up. + plugins_mod._plugin_manager = plugins_mod.PluginManager() plugins_mod.discover_plugins() long_output = "X" * 60000 From 9c0fc0b4e82d83b30123f8df9beccc43ebac4dc6 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 00:12:44 -0700 Subject: [PATCH 04/63] fix(whatsapp): remove shadowing shutil import in cmd_whatsapp (#13364) The re-pair branch had a redundant 'import shutil' inside cmd_whatsapp, which made shutil a function-local throughout the whole scope. The earlier 'shutil.which("npm")' call at the dependency-install step then crashed with UnboundLocalError before control ever reached the local import. shutil is already imported at module level (line 48), so the local import was dead code anyway. Drop it. --- hermes_cli/main.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index f88c42ddaf..adac54fb4c 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1330,8 +1330,6 @@ def cmd_whatsapp(args): except (EOFError, KeyboardInterrupt): response = "n" if response.lower() in ("y", "yes"): - import shutil - shutil.rmtree(session_dir, ignore_errors=True) session_dir.mkdir(parents=True, exist_ok=True) print(" ✓ Session cleared") From b48ea41d27b755b7bd69f74cd6938a5a3d389112 Mon Sep 17 00:00:00 2001 From: helix4u <4317663+helix4u@users.noreply.github.com> Date: Mon, 20 Apr 2026 18:48:59 -0600 Subject: [PATCH 05/63] feat(voice): add cli beep toggle --- cli.py | 33 +++++++--- hermes_cli/config.py | 3 +- tests/tools/test_voice_cli_integration.py | 62 +++++++++++++++++++ .../docs/guides/use-voice-mode-with-hermes.md | 1 + website/docs/user-guide/configuration.md | 1 + .../docs/user-guide/features/voice-mode.md | 3 +- 6 files changed, 91 insertions(+), 12 deletions(-) diff --git a/cli.py b/cli.py index 4b315f9b61..18aeb27161 100644 --- a/cli.py +++ b/cli.py @@ -7414,11 +7414,12 @@ class HermesCLI: self._voice_stop_and_transcribe() # Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict) - try: - from tools.voice_mode import play_beep - play_beep(frequency=880, count=1) - except Exception: - pass + if self._voice_beeps_enabled(): + try: + from tools.voice_mode import play_beep + play_beep(frequency=880, count=1) + except Exception: + pass try: self._voice_recorder.start(on_silence_stop=_on_silence) @@ -7466,11 +7467,12 @@ class HermesCLI: wav_path = self._voice_recorder.stop() # Audio cue: double beep after stream stopped (no CoreAudio conflict) - try: - from tools.voice_mode import play_beep - play_beep(frequency=660, count=2) - except Exception: - pass + if self._voice_beeps_enabled(): + try: + from tools.voice_mode import play_beep + play_beep(frequency=660, count=2) + except Exception: + pass if wav_path is None: _cprint(f"{_DIM}No speech detected.{_RST}") @@ -7621,6 +7623,17 @@ class HermesCLI: _cprint(f"Unknown voice subcommand: {subcommand}") _cprint("Usage: /voice [on|off|tts|status]") + def _voice_beeps_enabled(self) -> bool: + """Return whether CLI voice mode should play record start/stop beeps.""" + try: + from hermes_cli.config import load_config + voice_cfg = load_config().get("voice", {}) + if isinstance(voice_cfg, dict): + return bool(voice_cfg.get("beep_enabled", True)) + except Exception: + pass + return True + def _enable_voice_mode(self): """Enable voice mode after checking requirements.""" if self._voice_mode: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 5f10f0de27..b1566a2a5a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -645,6 +645,7 @@ DEFAULT_CONFIG = { "record_key": "ctrl+b", "max_recording_seconds": 120, "auto_tts": False, + "beep_enabled": True, # Play record start/stop beeps in CLI voice mode "silence_threshold": 200, # RMS below this = silence (0-32767) "silence_duration": 3.0, # Seconds of silence before auto-stop }, @@ -849,7 +850,7 @@ DEFAULT_CONFIG = { }, # Config schema version - bump this when adding new required fields - "_config_version": 21, + "_config_version": 22, } # ============================================================================= diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py index da500996a1..e7d8811e02 100644 --- a/tests/tools/test_voice_cli_integration.py +++ b/tests/tools/test_voice_cli_integration.py @@ -933,6 +933,58 @@ class TestEnableVoiceModeReal: assert cli._voice_mode is True +class TestVoiceBeepConfigReal: + """Tests the CLI voice beep toggle.""" + + @patch("hermes_cli.config.load_config", return_value={"voice": {}}) + def test_beeps_enabled_by_default(self, _cfg): + cli = _make_voice_cli() + assert cli._voice_beeps_enabled() is True + + @patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}}) + def test_beeps_can_be_disabled(self, _cfg): + cli = _make_voice_cli() + assert cli._voice_beeps_enabled() is False + + @patch("cli._cprint") + @patch("cli.threading.Thread") + @patch("tools.voice_mode.play_beep") + @patch("tools.voice_mode.create_audio_recorder") + @patch( + "tools.voice_mode.check_voice_requirements", + return_value={ + "available": True, + "audio_available": True, + "stt_available": True, + "details": "OK", + "missing_packages": [], + }, + ) + @patch( + "hermes_cli.config.load_config", + return_value={ + "voice": { + "beep_enabled": False, + "silence_threshold": 200, + "silence_duration": 3.0, + } + }, + ) + def test_start_recording_skips_beep_when_disabled( + self, _cfg, _req, mock_create, mock_beep, mock_thread, _cp + ): + recorder = MagicMock() + recorder.supports_silence_autostop = True + mock_create.return_value = recorder + mock_thread.return_value = MagicMock(start=MagicMock()) + + cli = _make_voice_cli() + cli._voice_start_recording() + + recorder.start.assert_called_once() + mock_beep.assert_not_called() + + class TestDisableVoiceModeReal: """Tests _disable_voice_mode with real CLI instance.""" @@ -1087,6 +1139,16 @@ class TestVoiceStopAndTranscribeReal: cli._voice_stop_and_transcribe() assert cli._pending_input.empty() + @patch("cli._cprint") + @patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}}) + @patch("tools.voice_mode.play_beep") + def test_no_speech_detected_skips_beep_when_disabled(self, mock_beep, _cfg, _cp): + recorder = MagicMock() + recorder.stop.return_value = None + cli = _make_voice_cli(_voice_recording=True, _voice_recorder=recorder) + cli._voice_stop_and_transcribe() + mock_beep.assert_not_called() + @patch("cli._cprint") @patch("cli.os.unlink") @patch("cli.os.path.isfile", return_value=True) diff --git a/website/docs/guides/use-voice-mode-with-hermes.md b/website/docs/guides/use-voice-mode-with-hermes.md index 42b3355595..d43c0a0182 100644 --- a/website/docs/guides/use-voice-mode-with-hermes.md +++ b/website/docs/guides/use-voice-mode-with-hermes.md @@ -164,6 +164,7 @@ voice: record_key: "ctrl+b" max_recording_seconds: 120 auto_tts: false + beep_enabled: true silence_threshold: 200 silence_duration: 3.0 diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 4eb0c56d95..c6afd83322 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -1049,6 +1049,7 @@ voice: record_key: "ctrl+b" # Push-to-talk key inside the CLI max_recording_seconds: 120 # Hard stop for long recordings auto_tts: false # Enable spoken replies automatically when /voice on + beep_enabled: true # Play record start/stop beeps in CLI voice mode silence_threshold: 200 # RMS threshold for speech detection silence_duration: 3.0 # Seconds of silence before auto-stop ``` diff --git a/website/docs/user-guide/features/voice-mode.md b/website/docs/user-guide/features/voice-mode.md index 2befd59e0f..b82718cf04 100644 --- a/website/docs/user-guide/features/voice-mode.md +++ b/website/docs/user-guide/features/voice-mode.md @@ -149,7 +149,7 @@ Two-stage algorithm detects when you've finished speaking: If no speech is detected at all for 15 seconds, recording stops automatically. -Both `silence_threshold` and `silence_duration` are configurable in `config.yaml`. +Both `silence_threshold` and `silence_duration` are configurable in `config.yaml`. You can also disable the record start/stop beeps with `voice.beep_enabled: false`. ### Streaming TTS @@ -383,6 +383,7 @@ voice: record_key: "ctrl+b" # Key to start/stop recording max_recording_seconds: 120 # Maximum recording length auto_tts: false # Auto-enable TTS when voice mode starts + beep_enabled: true # Play record start/stop beeps silence_threshold: 200 # RMS level (0-32767) below which counts as silence silence_duration: 3.0 # Seconds of silence before auto-stop From 328223576b4dc29cbbb48a2037a82d0b37e8ac47 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 00:39:19 -0700 Subject: [PATCH 06/63] feat(skills+terminal): make bundled skill scripts runnable out of the box (#13384) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(skills): inject absolute skill dir and expand ${HERMES_SKILL_DIR} templates When a skill loads, the activation message now exposes the absolute skill directory and substitutes ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in the SKILL.md body, so skills with bundled scripts can instruct the agent to run them by absolute path without an extra skill_view round-trip. Also adds opt-in inline-shell expansion: !`cmd` snippets in SKILL.md are pre-executed (with the skill directory as CWD) and their stdout is inlined into the message before the agent reads it. Off by default — enable via skills.inline_shell in config.yaml — because any snippet runs on the host without approval. Changes: - agent/skill_commands.py: template substitution, inline-shell expansion, absolute skill-dir header, supporting-files list now shows both relative and absolute forms. - hermes_cli/config.py: new skills.template_vars, skills.inline_shell, skills.inline_shell_timeout knobs. - tests/agent/test_skill_commands.py: coverage for header, both template tokens (present and missing session id), template_vars disable, inline-shell default-off, enabled, CWD, and timeout. - website/docs/developer-guide/creating-skills.md: documents the template tokens, the absolute-path header, and the opt-in inline shell with its security caveat. Validation: tests/agent/ 1591 passed (includes 9 new tests). E2E: loaded a real skill in an isolated HERMES_HOME; confirmed ${HERMES_SKILL_DIR} resolves to the absolute path, ${HERMES_SESSION_ID} resolves to the passed task_id, !`date` runs when opt-in is set, and stays literal when it isn't. * feat(terminal): source ~/.bashrc (and user-listed init files) into session snapshot bash login shells don't source ~/.bashrc, so tools that install themselves there — nvm, asdf, pyenv, cargo, custom PATH exports — stay invisible to the environment snapshot Hermes builds once per session. Under systemd or any context with a minimal parent env, that surfaces as 'node: command not found' in the terminal tool even though the binary is reachable from every interactive shell on the machine. Changes: - tools/environments/local.py: before the login-shell snapshot bootstrap runs, prepend guarded 'source ' lines for each resolved init file. Missing files are skipped, each source is wrapped with a '[ -r ... ] && . ... || true' guard so a broken rc can't abort the bootstrap. - hermes_cli/config.py: new terminal.shell_init_files (explicit list, supports ~ and ${VAR}) and terminal.auto_source_bashrc (default on) knobs. When shell_init_files is set it takes precedence; when it's empty and auto_source_bashrc is on, ~/.bashrc gets auto-sourced. - tests/tools/test_local_shell_init.py: 10 tests covering the resolver (auto-bashrc, missing file, explicit override, ~/${VAR} expansion, opt-out) and the prelude builder (quoting, guarded sourcing), plus a real-LocalEnvironment snapshot test that confirms exports in the init file land in subsequent commands' environment. - website/docs/reference/faq.md: documents the fix in Troubleshooting, including the zsh-user pattern of sourcing ~/.zshrc or nvm.sh directly via shell_init_files. Validation: 10/10 new tests pass; tests/tools/test_local_*.py 40/40 pass; tests/agent/ 1591/1591 pass; tests/hermes_cli/test_config.py 50/50 pass. E2E in an isolated HERMES_HOME: confirmed that a fake ~/.bashrc setting a marker var and PATH addition shows up in a real LocalEnvironment().execute() call, that auto_source_bashrc=false suppresses it, that an explicit shell_init_files entry wins over the auto default, and that a missing bashrc is silently skipped. --- agent/skill_commands.py | 137 ++++++++++++- hermes_cli/config.py | 34 ++++ tests/agent/test_skill_commands.py | 188 ++++++++++++++++++ tests/tools/test_local_shell_init.py | 162 +++++++++++++++ tools/environments/local.py | 81 ++++++++ .../docs/developer-guide/creating-skills.md | 39 ++++ website/docs/reference/faq.md | 27 +++ 7 files changed, 665 insertions(+), 3 deletions(-) create mode 100644 tests/tools/test_local_shell_init.py diff --git a/agent/skill_commands.py b/agent/skill_commands.py index 280105daca..a4345ca8c4 100644 --- a/agent/skill_commands.py +++ b/agent/skill_commands.py @@ -8,6 +8,7 @@ can invoke skills via /skill-name commands and prompt-only built-ins like import json import logging import re +import subprocess from datetime import datetime from pathlib import Path from typing import Any, Dict, Optional @@ -22,6 +23,110 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+") _SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]") _SKILL_MULTI_HYPHEN = re.compile(r"-{2,}") +# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md. +# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are +# left as-is so the user can debug them. +_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}") + +# Matches inline shell snippets like: !`date +%Y-%m-%d` +# Non-greedy, single-line only — no newlines inside the backticks. +_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`") + +# Cap inline-shell output so a runaway command can't blow out the context. +_INLINE_SHELL_MAX_OUTPUT = 4000 + + +def _load_skills_config() -> dict: + """Load the ``skills`` section of config.yaml (best-effort).""" + try: + from hermes_cli.config import load_config + + cfg = load_config() or {} + skills_cfg = cfg.get("skills") + if isinstance(skills_cfg, dict): + return skills_cfg + except Exception: + logger.debug("Could not read skills config", exc_info=True) + return {} + + +def _substitute_template_vars( + content: str, + skill_dir: Path | None, + session_id: str | None, +) -> str: + """Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content. + + Only substitutes tokens for which a concrete value is available — + unresolved tokens are left in place so the author can spot them. + """ + if not content: + return content + + skill_dir_str = str(skill_dir) if skill_dir else None + + def _replace(match: re.Match) -> str: + token = match.group(1) + if token == "HERMES_SKILL_DIR" and skill_dir_str: + return skill_dir_str + if token == "HERMES_SESSION_ID" and session_id: + return str(session_id) + return match.group(0) + + return _SKILL_TEMPLATE_RE.sub(_replace, content) + + +def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str: + """Execute a single inline-shell snippet and return its stdout (trimmed). + + Failures return a short ``[inline-shell error: ...]`` marker instead of + raising, so one bad snippet can't wreck the whole skill message. + """ + try: + completed = subprocess.run( + ["bash", "-c", command], + cwd=str(cwd) if cwd else None, + capture_output=True, + text=True, + timeout=max(1, int(timeout)), + check=False, + ) + except subprocess.TimeoutExpired: + return f"[inline-shell timeout after {timeout}s: {command}]" + except FileNotFoundError: + return f"[inline-shell error: bash not found]" + except Exception as exc: + return f"[inline-shell error: {exc}]" + + output = (completed.stdout or "").rstrip("\n") + if not output and completed.stderr: + output = completed.stderr.rstrip("\n") + if len(output) > _INLINE_SHELL_MAX_OUTPUT: + output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]" + return output + + +def _expand_inline_shell( + content: str, + skill_dir: Path | None, + timeout: int, +) -> str: + """Replace every !`cmd` snippet in ``content`` with its stdout. + + Runs each snippet with the skill directory as CWD so relative paths in + the snippet work the way the author expects. + """ + if "!`" not in content: + return content + + def _replace(match: re.Match) -> str: + cmd = match.group(1).strip() + if not cmd: + return "" + return _run_inline_shell(cmd, skill_dir, timeout) + + return _INLINE_SHELL_RE.sub(_replace, content) + def build_plan_path( user_instruction: str = "", @@ -133,14 +238,36 @@ def _build_skill_message( activation_note: str, user_instruction: str = "", runtime_note: str = "", + session_id: str | None = None, ) -> str: """Format a loaded skill into a user/system message payload.""" from tools.skills_tool import SKILLS_DIR content = str(loaded_skill.get("content") or "") + # ── Template substitution and inline-shell expansion ── + # Done before anything else so downstream blocks (setup notes, + # supporting-file hints) see the expanded content. + skills_cfg = _load_skills_config() + if skills_cfg.get("template_vars", True): + content = _substitute_template_vars(content, skill_dir, session_id) + if skills_cfg.get("inline_shell", False): + timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10) + content = _expand_inline_shell(content, skill_dir, timeout) + parts = [activation_note, "", content.strip()] + # ── Inject the absolute skill directory so the agent can reference + # bundled scripts without an extra skill_view() round-trip. ── + if skill_dir: + parts.append("") + parts.append(f"[Skill directory: {skill_dir}]") + parts.append( + "Resolve any relative paths in this skill (e.g. `scripts/foo.js`, " + "`templates/config.yaml`) against that directory, then run them " + "with the terminal tool using the absolute path." + ) + # ── Inject resolved skill config values ── _inject_skill_config(loaded_skill, parts) @@ -188,11 +315,13 @@ def _build_skill_message( # Skill is from an external dir — use the skill name instead skill_view_target = skill_dir.name parts.append("") - parts.append("[This skill has supporting files you can load with the skill_view tool:]") + parts.append("[This skill has supporting files:]") for sf in supporting: - parts.append(f"- {sf}") + parts.append(f"- {sf} -> {skill_dir / sf}") parts.append( - f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="")' + f'\nLoad any of these with skill_view(name="{skill_view_target}", ' + f'file_path=""), or run scripts directly by absolute path ' + f"(e.g. `node {skill_dir}/scripts/foo.js`)." ) if user_instruction: @@ -332,6 +461,7 @@ def build_skill_invocation_message( activation_note, user_instruction=user_instruction, runtime_note=runtime_note, + session_id=task_id, ) @@ -370,6 +500,7 @@ def build_preloaded_skills_prompt( loaded_skill, skill_dir, activation_note, + session_id=task_id, ) ) loaded_names.append(skill_name) diff --git a/hermes_cli/config.py b/hermes_cli/config.py index b1566a2a5a..4ed7eaf8e4 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -387,6 +387,26 @@ DEFAULT_CONFIG = { # (terminal and execute_code). Skill-declared required_environment_variables # are passed through automatically; this list is for non-skill use cases. "env_passthrough": [], + # Extra files to source in the login shell when building the + # per-session environment snapshot. Use this when tools like nvm, + # pyenv, asdf, or custom PATH entries are registered by files that + # a bash login shell would skip — most commonly ``~/.bashrc`` + # (bash doesn't source bashrc in non-interactive login mode) or + # zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``. + # Paths support ``~`` / ``${VAR}``. Missing files are silently + # skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the + # snapshot shell is bash (this is the ``auto_source_bashrc`` + # behaviour — disable with that key if you want strict login-only + # semantics). + "shell_init_files": [], + # When true (default), Hermes sources ``~/.bashrc`` in the login + # shell used to build the environment snapshot. This captures + # PATH additions, shell functions, and aliases defined in the + # user's bashrc — which a plain ``bash -l -c`` would otherwise + # miss because bash skips bashrc in non-interactive login mode. + # Turn this off if you have a bashrc that misbehaves when sourced + # non-interactively (e.g. one that hard-exits on TTY checks). + "auto_source_bashrc": True, "docker_image": "nikolaik/python-nodejs:python3.11-nodejs20", "docker_forward_env": [], # Explicit environment variables to set inside Docker containers. @@ -704,6 +724,20 @@ DEFAULT_CONFIG = { # always goes to ~/.hermes/skills/. "skills": { "external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"] + # Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md + # content with the absolute skill directory and the active session id + # before the agent sees it. Lets skill authors reference bundled + # scripts without the agent having to join paths. + "template_vars": True, + # Pre-execute inline shell snippets written as !`cmd` in SKILL.md + # body. Their stdout is inlined into the skill message before the + # agent reads it, so skills can inject dynamic context (dates, git + # state, detected tool versions, …). Off by default because any + # content from the skill author runs on the host without approval; + # only enable for skill sources you trust. + "inline_shell": False, + # Timeout (seconds) for each !`cmd` snippet when inline_shell is on. + "inline_shell_timeout": 10, }, # Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth. diff --git a/tests/agent/test_skill_commands.py b/tests/agent/test_skill_commands.py index 57ac7d6b58..e399db619e 100644 --- a/tests/agent/test_skill_commands.py +++ b/tests/agent/test_skill_commands.py @@ -405,3 +405,191 @@ class TestPlanSkillHelpers: assert "Add a /plan command" in msg assert ".hermes/plans/plan.md" in msg assert "Runtime note:" in msg + + +class TestSkillDirectoryHeader: + """The activation message must expose the absolute skill directory and + explain how to resolve relative paths, so skills with bundled scripts + don't force the agent into a second ``skill_view()`` round-trip.""" + + def test_header_contains_absolute_skill_dir(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + skill_dir = _make_skill(tmp_path, "abs-dir-skill") + scan_skill_commands() + msg = build_skill_invocation_message("/abs-dir-skill", "go") + + assert msg is not None + assert f"[Skill directory: {skill_dir}]" in msg + assert "Resolve any relative paths" in msg + + def test_supporting_files_shown_with_absolute_paths(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + skill_dir = _make_skill(tmp_path, "scripted-skill") + (skill_dir / "scripts").mkdir() + (skill_dir / "scripts" / "run.js").write_text("console.log('hi')") + scan_skill_commands() + msg = build_skill_invocation_message("/scripted-skill") + + assert msg is not None + # The supporting-files block must emit both the relative form (so the + # agent can call skill_view on it) and the absolute form (so it can + # run the script directly via terminal). + assert "scripts/run.js" in msg + assert str(skill_dir / "scripts" / "run.js") in msg + assert f"node {skill_dir}/scripts/foo.js" in msg + + +class TestTemplateVarSubstitution: + """``${HERMES_SKILL_DIR}`` and ``${HERMES_SESSION_ID}`` in SKILL.md body + are replaced before the agent sees the content.""" + + def test_substitutes_skill_dir(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + skill_dir = _make_skill( + tmp_path, + "templated", + body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/templated") + + assert msg is not None + assert f"node {skill_dir}/scripts/foo.js" in msg + # The literal template token must not leak through. + assert "${HERMES_SKILL_DIR}" not in msg.split("[Skill directory:")[0] + + def test_substitutes_session_id_when_available(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_skill( + tmp_path, + "sess-templated", + body="Session: ${HERMES_SESSION_ID}", + ) + scan_skill_commands() + msg = build_skill_invocation_message( + "/sess-templated", task_id="abc-123" + ) + + assert msg is not None + assert "Session: abc-123" in msg + + def test_leaves_session_id_token_when_missing(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_skill( + tmp_path, + "sess-missing", + body="Session: ${HERMES_SESSION_ID}", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/sess-missing", task_id=None) + + assert msg is not None + # No session — token left intact so the author can spot it. + assert "Session: ${HERMES_SESSION_ID}" in msg + + def test_disable_template_vars_via_config(self, tmp_path): + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_commands._load_skills_config", + return_value={"template_vars": False}, + ), + ): + _make_skill( + tmp_path, + "no-sub", + body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/no-sub") + + assert msg is not None + # Template token must survive when substitution is disabled. + assert "${HERMES_SKILL_DIR}/scripts/foo.js" in msg + + +class TestInlineShellExpansion: + """Inline ``!`cmd`` snippets in SKILL.md run before the agent sees the + content — but only when the user has opted in via config.""" + + def test_inline_shell_is_off_by_default(self, tmp_path): + with patch("tools.skills_tool.SKILLS_DIR", tmp_path): + _make_skill( + tmp_path, + "dyn-default-off", + body="Today is !`echo INLINE_RAN`.", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/dyn-default-off") + + assert msg is not None + # Default config has inline_shell=False — snippet must stay literal. + assert "!`echo INLINE_RAN`" in msg + assert "Today is INLINE_RAN." not in msg + + def test_inline_shell_runs_when_enabled(self, tmp_path): + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_commands._load_skills_config", + return_value={"template_vars": True, "inline_shell": True, + "inline_shell_timeout": 5}, + ), + ): + _make_skill( + tmp_path, + "dyn-on", + body="Marker: !`echo INLINE_RAN`.", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/dyn-on") + + assert msg is not None + assert "Marker: INLINE_RAN." in msg + assert "!`echo INLINE_RAN`" not in msg + + def test_inline_shell_runs_in_skill_directory(self, tmp_path): + """Inline snippets get the skill dir as CWD so relative paths work.""" + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_commands._load_skills_config", + return_value={"template_vars": True, "inline_shell": True, + "inline_shell_timeout": 5}, + ), + ): + skill_dir = _make_skill( + tmp_path, + "dyn-cwd", + body="Here: !`pwd`", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/dyn-cwd") + + assert msg is not None + assert f"Here: {skill_dir}" in msg + + def test_inline_shell_timeout_does_not_break_message(self, tmp_path): + with ( + patch("tools.skills_tool.SKILLS_DIR", tmp_path), + patch( + "agent.skill_commands._load_skills_config", + return_value={"template_vars": True, "inline_shell": True, + "inline_shell_timeout": 1}, + ), + ): + _make_skill( + tmp_path, + "dyn-slow", + body="Slow: !`sleep 5 && printf DYN_MARKER`", + ) + scan_skill_commands() + msg = build_skill_invocation_message("/dyn-slow") + + assert msg is not None + # Timeout is surfaced as a marker instead of propagating as an error, + # and the rest of the skill message still renders. + assert "inline-shell timeout" in msg + # The command's intended stdout never made it through — only the + # timeout marker (which echoes the command text) survives. + assert "DYN_MARKER" not in msg.replace("sleep 5 && printf DYN_MARKER", "") diff --git a/tests/tools/test_local_shell_init.py b/tests/tools/test_local_shell_init.py new file mode 100644 index 0000000000..96e26e7357 --- /dev/null +++ b/tests/tools/test_local_shell_init.py @@ -0,0 +1,162 @@ +"""Tests for terminal.shell_init_files / terminal.auto_source_bashrc. + +A bash ``-l -c`` invocation does NOT source ``~/.bashrc``, so tools that +register themselves there (nvm, asdf, pyenv) stay invisible to the +environment snapshot built by ``LocalEnvironment.init_session``. These +tests verify the config-driven prelude that fixes that. +""" + +import os +from unittest.mock import patch + +import pytest + +from tools.environments.local import ( + LocalEnvironment, + _prepend_shell_init, + _read_terminal_shell_init_config, + _resolve_shell_init_files, +) + + +class TestResolveShellInitFiles: + def test_auto_sources_bashrc_when_present(self, tmp_path, monkeypatch): + bashrc = tmp_path / ".bashrc" + bashrc.write_text('export MARKER=seen\n') + monkeypatch.setenv("HOME", str(tmp_path)) + + # Default config: auto_source_bashrc on, no explicit list. + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([], True), + ): + resolved = _resolve_shell_init_files() + + assert resolved == [str(bashrc)] + + def test_skips_bashrc_when_missing(self, tmp_path, monkeypatch): + # No bashrc written. + monkeypatch.setenv("HOME", str(tmp_path)) + + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([], True), + ): + resolved = _resolve_shell_init_files() + + assert resolved == [] + + def test_auto_source_bashrc_off_suppresses_default(self, tmp_path, monkeypatch): + bashrc = tmp_path / ".bashrc" + bashrc.write_text('export MARKER=seen\n') + monkeypatch.setenv("HOME", str(tmp_path)) + + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([], False), + ): + resolved = _resolve_shell_init_files() + + assert resolved == [] + + def test_explicit_list_wins_over_auto(self, tmp_path, monkeypatch): + bashrc = tmp_path / ".bashrc" + bashrc.write_text('export FROM_BASHRC=1\n') + custom = tmp_path / "custom.sh" + custom.write_text('export FROM_CUSTOM=1\n') + monkeypatch.setenv("HOME", str(tmp_path)) + + # auto_source_bashrc stays True but the explicit list takes precedence. + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([str(custom)], True), + ): + resolved = _resolve_shell_init_files() + + assert resolved == [str(custom)] + assert str(bashrc) not in resolved + + def test_expands_home_and_env_vars(self, tmp_path, monkeypatch): + target = tmp_path / "rc" / "custom.sh" + target.parent.mkdir() + target.write_text('export A=1\n') + monkeypatch.setenv("HOME", str(tmp_path)) + monkeypatch.setenv("CUSTOM_RC_DIR", str(tmp_path / "rc")) + + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=(["~/rc/custom.sh"], False), + ): + resolved_home = _resolve_shell_init_files() + + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=(["${CUSTOM_RC_DIR}/custom.sh"], False), + ): + resolved_var = _resolve_shell_init_files() + + assert resolved_home == [str(target)] + assert resolved_var == [str(target)] + + def test_missing_explicit_files_are_skipped_silently(self, tmp_path, monkeypatch): + monkeypatch.setenv("HOME", str(tmp_path)) + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([str(tmp_path / "does-not-exist.sh")], False), + ): + resolved = _resolve_shell_init_files() + + assert resolved == [] + + +class TestPrependShellInit: + def test_empty_list_returns_command_unchanged(self): + assert _prepend_shell_init("echo hi", []) == "echo hi" + + def test_prepends_guarded_source_lines(self): + wrapped = _prepend_shell_init("echo hi", ["/tmp/a.sh", "/tmp/b.sh"]) + assert "echo hi" in wrapped + # Each file is sourced through a guarded [ -r … ] && . '…' || true + # pattern so a missing/broken rc can't abort the bootstrap. + assert "/tmp/a.sh" in wrapped + assert "/tmp/b.sh" in wrapped + assert "|| true" in wrapped + assert "set +e" in wrapped + + def test_escapes_single_quotes(self): + wrapped = _prepend_shell_init("echo hi", ["/tmp/o'malley.sh"]) + # The path must survive as the shell receives it; embedded single + # quote is escaped as '\'' rather than breaking the outer quoting. + assert "o'\\''malley" in wrapped + + +@pytest.mark.skipif( + os.environ.get("CI") == "true" and not os.path.isfile("/bin/bash"), + reason="Requires bash; CI sandbox may strip it.", +) +class TestSnapshotEndToEnd: + """Spin up a real LocalEnvironment and confirm the snapshot sources + extra init files.""" + + def test_snapshot_picks_up_init_file_exports(self, tmp_path, monkeypatch): + init_file = tmp_path / "custom-init.sh" + init_file.write_text( + 'export HERMES_SHELL_INIT_PROBE="probe-ok"\n' + 'export PATH="/opt/shell-init-probe/bin:$PATH"\n' + ) + + with patch( + "tools.environments.local._read_terminal_shell_init_config", + return_value=([str(init_file)], False), + ): + env = LocalEnvironment(cwd=str(tmp_path), timeout=15) + try: + result = env.execute( + 'echo "PROBE=$HERMES_SHELL_INIT_PROBE"; echo "PATH=$PATH"' + ) + finally: + env.cleanup() + + output = result.get("output", "") + assert "PROBE=probe-ok" in output + assert "/opt/shell-init-probe/bin" in output diff --git a/tools/environments/local.py b/tools/environments/local.py index a1ab676d30..06fd66a2d0 100644 --- a/tools/environments/local.py +++ b/tools/environments/local.py @@ -213,6 +213,77 @@ def _make_run_env(env: dict) -> dict: return run_env +def _read_terminal_shell_init_config() -> tuple[list[str], bool]: + """Return (shell_init_files, auto_source_bashrc) from config.yaml. + + Best-effort — returns sensible defaults on any failure so terminal + execution never breaks because the config file is unreadable. + """ + try: + from hermes_cli.config import load_config + + cfg = load_config() or {} + terminal_cfg = cfg.get("terminal") or {} + files = terminal_cfg.get("shell_init_files") or [] + if not isinstance(files, list): + files = [] + auto_bashrc = bool(terminal_cfg.get("auto_source_bashrc", True)) + return [str(f) for f in files if f], auto_bashrc + except Exception: + return [], True + + +def _resolve_shell_init_files() -> list[str]: + """Resolve the list of files to source before the login-shell snapshot. + + Expands ``~`` and ``${VAR}`` references and drops anything that doesn't + exist on disk, so a missing ``~/.bashrc`` never breaks the snapshot. + The ``auto_source_bashrc`` path runs only when the user hasn't supplied + an explicit list — once they have, Hermes trusts them. + """ + explicit, auto_bashrc = _read_terminal_shell_init_config() + + candidates: list[str] = [] + if explicit: + candidates.extend(explicit) + elif auto_bashrc and not _IS_WINDOWS: + # Bash's login-shell invocation does NOT source ~/.bashrc by default, + # so tools like nvm / asdf / pyenv that self-install there stay + # invisible to the snapshot without this nudge. + candidates.append("~/.bashrc") + + resolved: list[str] = [] + for raw in candidates: + try: + path = os.path.expandvars(os.path.expanduser(raw)) + except Exception: + continue + if path and os.path.isfile(path): + resolved.append(path) + return resolved + + +def _prepend_shell_init(cmd_string: str, files: list[str]) -> str: + """Prepend ``source `` lines (guarded + silent) to a bash script. + + Each file is wrapped so a failing rc file doesn't abort the whole + bootstrap: ``set +e`` keeps going on errors, ``2>/dev/null`` hides + noisy prompts, and ``|| true`` neutralises the exit status. + """ + if not files: + return cmd_string + + prelude_parts = ["set +e"] + for path in files: + # shlex.quote isn't available here without an import; the files list + # comes from os.path.expanduser output so it's a concrete absolute + # path. Escape single quotes defensively anyway. + safe = path.replace("'", "'\\''") + prelude_parts.append(f"[ -r '{safe}' ] && . '{safe}' 2>/dev/null || true") + prelude = "\n".join(prelude_parts) + "\n" + return prelude + cmd_string + + class LocalEnvironment(BaseEnvironment): """Run commands directly on the host machine. @@ -255,6 +326,16 @@ class LocalEnvironment(BaseEnvironment): timeout: int = 120, stdin_data: str | None = None) -> subprocess.Popen: bash = _find_bash() + # For login-shell invocations (used by init_session to build the + # environment snapshot), prepend sources for the user's bashrc / + # custom init files so tools registered outside bash_profile + # (nvm, asdf, pyenv, …) end up on PATH in the captured snapshot. + # Non-login invocations are already sourcing the snapshot and + # don't need this. + if login: + init_files = _resolve_shell_init_files() + if init_files: + cmd_string = _prepend_shell_init(cmd_string, init_files) args = [bash, "-l", "-c", cmd_string] if login else [bash, "-c", cmd_string] run_env = _make_run_env(self.env) diff --git a/website/docs/developer-guide/creating-skills.md b/website/docs/developer-guide/creating-skills.md index 9fdb7fd115..43f088a9a3 100644 --- a/website/docs/developer-guide/creating-skills.md +++ b/website/docs/developer-guide/creating-skills.md @@ -272,6 +272,45 @@ Put the most common workflow first. Edge cases and advanced usage go at the bott For XML/JSON parsing or complex logic, include helper scripts in `scripts/` — don't expect the LLM to write parsers inline every time. +#### Referencing bundled scripts from SKILL.md + +When a skill is loaded, the activation message exposes the absolute skill directory as `[Skill directory: /abs/path]` and also substitutes two template tokens anywhere in the SKILL.md body: + +| Token | Replaced with | +|---|---| +| `${HERMES_SKILL_DIR}` | Absolute path to the skill's directory | +| `${HERMES_SESSION_ID}` | The active session id (left in place if there is no session) | + +So a SKILL.md can tell the agent to run a bundled script directly with: + +```markdown +To analyse the input, run: + + node ${HERMES_SKILL_DIR}/scripts/analyse.js +``` + +The agent sees the substituted absolute path and invokes the `terminal` tool with a ready-to-run command — no path math, no extra `skill_view` round-trip. Disable substitution globally with `skills.template_vars: false` in `config.yaml`. + +#### Inline shell snippets (opt-in) + +Skills can also embed inline shell snippets written as `` !`cmd` `` in the SKILL.md body. When enabled, each snippet's stdout is inlined into the message before the agent reads it, so skills can inject dynamic context: + +```markdown +Current date: !`date -u +%Y-%m-%d` +Git branch: !`git -C ${HERMES_SKILL_DIR} rev-parse --abbrev-ref HEAD` +``` + +This is **off by default** — any snippet in a SKILL.md runs on the host without approval, so only enable it for skill sources you trust: + +```yaml +# config.yaml +skills: + inline_shell: true + inline_shell_timeout: 10 # seconds per snippet +``` + +Snippets run with the skill directory as their working directory, and output is capped at 4000 characters. Failures (timeouts, non-zero exits) show up as a short `[inline-shell error: ...]` marker instead of breaking the whole skill. + ### Test It Run the skill and verify the agent follows the instructions correctly: diff --git a/website/docs/reference/faq.md b/website/docs/reference/faq.md index 132a4d00a9..8a8b9df414 100644 --- a/website/docs/reference/faq.md +++ b/website/docs/reference/faq.md @@ -160,6 +160,33 @@ brew install python@3.12 # macOS The installer handles this automatically — if you see this error during manual installation, upgrade Python first. +#### Terminal commands say `node: command not found` (or `nvm`, `pyenv`, `asdf`, …) + +**Cause:** Hermes builds a per-session environment snapshot by running `bash -l` once at startup. A bash login shell reads `/etc/profile`, `~/.bash_profile`, and `~/.profile`, but **does not source `~/.bashrc`** — so tools that install themselves there (`nvm`, `asdf`, `pyenv`, `cargo`, custom `PATH` exports) stay invisible to the snapshot. This most commonly happens when Hermes runs under systemd or in a minimal shell where nothing has pre-loaded the interactive shell profile. + +**Solution:** Hermes auto-sources `~/.bashrc` by default. If that's not enough — e.g. you're a zsh user whose PATH lives in `~/.zshrc`, or you init `nvm` from a standalone file — list the extra files to source in `~/.hermes/config.yaml`: + +```yaml +terminal: + shell_init_files: + - ~/.zshrc # zsh users: pulls zsh-managed PATH into the bash snapshot + - ~/.nvm/nvm.sh # direct nvm init (works regardless of shell) + - /etc/profile.d/cargo.sh # system-wide rc files + # When this list is set, the default ~/.bashrc auto-source is NOT added — + # include it explicitly if you want both: + # - ~/.bashrc + # - ~/.zshrc +``` + +Missing files are skipped silently. Sourcing happens in bash, so files that rely on zsh-only syntax may error — if that's a concern, source just the PATH-setting portion (e.g. nvm's `nvm.sh` directly) rather than the whole rc file. + +To disable the auto-source behaviour (strict login-shell semantics only): + +```yaml +terminal: + auto_source_bashrc: false +``` + #### `uv: command not found` **Cause:** The `uv` package manager isn't installed or not in PATH. From cbe29db774ac933f0c2fe07d500ad5f73316b7f9 Mon Sep 17 00:00:00 2001 From: opriz Date: Sat, 18 Apr 2026 02:24:35 +0800 Subject: [PATCH 07/63] fix(gateway): prevent --replace race condition causing multiple instances When starting the gateway with --replace, concurrent invocations could leave multiple instances running simultaneously. This happened because write_pid_file() used a plain overwrite, so the second racer would silently replace the first process's PID record. Changes: - gateway/status.py: write_pid_file() now uses atomic O_CREAT|O_EXCL creation. If the file already exists, it raises FileExistsError, allowing exactly one process to win the race. - gateway/run.py: before writing the PID file, re-check get_running_pid() and catch FileExistsError from write_pid_file(). In both cases, stop the runner and return False so the process exits cleanly. Fixes #11718 --- gateway/run.py | 21 +++++++++++++++++++-- gateway/status.py | 24 ++++++++++++++++++++++-- scripts/release.py | 1 + 3 files changed, 42 insertions(+), 4 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 6ce409ff1b..d3ee8d4a01 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -10956,8 +10956,25 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = # Write PID file so CLI can detect gateway is running import atexit - from gateway.status import write_pid_file, remove_pid_file - write_pid_file() + from gateway.status import write_pid_file, remove_pid_file, get_running_pid + # Defensive re-check: another --replace racer may have started + # while we were initializing. If so, yield and exit. + _current_pid = get_running_pid() + if _current_pid is not None and _current_pid != os.getpid(): + logger.error( + "Another gateway instance (PID %d) started during our startup. " + "Exiting to avoid double-running.", _current_pid + ) + await runner.stop() + return False + try: + write_pid_file() + except FileExistsError: + logger.error( + "PID file race lost to another gateway instance. Exiting." + ) + await runner.stop() + return False atexit.register(remove_pid_file) # Start background cron ticker so scheduled jobs fire automatically. diff --git a/gateway/status.py b/gateway/status.py index e1598e1797..74763332c8 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -225,8 +225,28 @@ def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None: def write_pid_file() -> None: - """Write the current process PID and metadata to the gateway PID file.""" - _write_json_file(_get_pid_path(), _build_pid_record()) + """Write the current process PID and metadata to the gateway PID file. + + Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace + invocations race: exactly one process wins and the rest get + FileExistsError. + """ + path = _get_pid_path() + path.parent.mkdir(parents=True, exist_ok=True) + record = json.dumps(_build_pid_record()) + try: + fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY) + except FileExistsError: + raise # Let caller decide: another gateway is racing us + try: + with os.fdopen(fd, "w", encoding="utf-8") as f: + f.write(record) + except Exception: + try: + path.unlink(missing_ok=True) + except OSError: + pass + raise def write_runtime_status( diff --git a/scripts/release.py b/scripts/release.py index 1a5a1ea8ad..efe32f2364 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -307,6 +307,7 @@ AUTHOR_MAP = { "anthhub@163.com": "anthhub", "shenuu@gmail.com": "shenuu", "xiayh17@gmail.com": "xiayh0107", + "zhujianxyz@gmail.com": "opriz", "asurla@nvidia.com": "anniesurla", "limkuan24@gmail.com": "WideLee", "aviralarora002@gmail.com": "AviArora02-commits", From 56b99e823950cebeef0da6f23bbe9db6e02f3655 Mon Sep 17 00:00:00 2001 From: opriz Date: Sat, 18 Apr 2026 13:55:03 +0800 Subject: [PATCH 08/63] fix(gateway): force-unlink stale PID file after --replace takeover If the old process crashed without firing its atexit handler, remove_pid_file() is a no-op. Force-unlink the stale gateway.pid so write_pid_file() (O_CREAT|O_EXCL) does not hit FileExistsError. --- gateway/run.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/gateway/run.py b/gateway/run.py index d3ee8d4a01..4bb85ea7d6 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -10807,6 +10807,12 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = except (ProcessLookupError, PermissionError, OSError): pass remove_pid_file() + # remove_pid_file() is a no-op when the PID doesn't match. + # Force-unlink to cover the old-process-crashed case. + try: + (get_hermes_home() / "gateway.pid").unlink(missing_ok=True) + except Exception: + pass # Clean up any takeover marker the old process didn't consume # (e.g. SIGKILL'd before its shutdown handler could read it). try: From ce9c91c8f77db1860bdf57142c5c4469702fb7fe Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 00:36:25 -0700 Subject: [PATCH 09/63] fix(gateway): close --replace race completely by claiming PID before adapter startup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up on top of opriz's atomic PID file fix. The prior change caught the race AFTER runner.start(), so the loser still opened Telegram polling and Discord gateway sockets before detecting the conflict and exiting. Hoist the PID-claim block to BEFORE runner.start(). Now the loser of the O_CREAT|O_EXCL race returns from start_gateway() without ever bringing up any platform adapter — no Telegram conflict, no Discord duplicate session. Also add regression tests: - test_write_pid_file_is_atomic_against_concurrent_writers: second write_pid_file() raises FileExistsError rather than clobbering. - Two existing replace-path tests updated to stateful mocks since the real post-kill state (get_running_pid None after remove_pid_file) is now exercised by the hoisted re-check. --- gateway/run.py | 47 ++++++++++--------- tests/gateway/test_runner_startup_failures.py | 20 ++++++-- tests/gateway/test_status.py | 24 ++++++++++ 3 files changed, 64 insertions(+), 27 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index 4bb85ea7d6..bd034854d7 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -10951,6 +10951,30 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = else: logger.info("Skipping signal handlers (not running in main thread).") + # Claim the PID file BEFORE bringing up any platform adapters. + # This closes the --replace race window: two concurrent `gateway run + # --replace` invocations both pass the termination-wait above, but + # only the winner of the O_CREAT|O_EXCL race below will ever open + # Telegram polling, Discord gateway sockets, etc. The loser exits + # cleanly before touching any external service. + import atexit + from gateway.status import write_pid_file, remove_pid_file, get_running_pid + _current_pid = get_running_pid() + if _current_pid is not None and _current_pid != os.getpid(): + logger.error( + "Another gateway instance (PID %d) started during our startup. " + "Exiting to avoid double-running.", _current_pid + ) + return False + try: + write_pid_file() + except FileExistsError: + logger.error( + "PID file race lost to another gateway instance. Exiting." + ) + return False + atexit.register(remove_pid_file) + # Start the gateway success = await runner.start() if not success: @@ -10960,29 +10984,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = logger.error("Gateway exiting cleanly: %s", runner.exit_reason) return True - # Write PID file so CLI can detect gateway is running - import atexit - from gateway.status import write_pid_file, remove_pid_file, get_running_pid - # Defensive re-check: another --replace racer may have started - # while we were initializing. If so, yield and exit. - _current_pid = get_running_pid() - if _current_pid is not None and _current_pid != os.getpid(): - logger.error( - "Another gateway instance (PID %d) started during our startup. " - "Exiting to avoid double-running.", _current_pid - ) - await runner.stop() - return False - try: - write_pid_file() - except FileExistsError: - logger.error( - "PID file race lost to another gateway instance. Exiting." - ) - await runner.stop() - return False - atexit.register(remove_pid_file) - # Start background cron ticker so scheduled jobs fire automatically. # Pass the event loop so cron delivery can use live adapters (E2EE support). cron_stop = threading.Event() diff --git a/tests/gateway/test_runner_startup_failures.py b/tests/gateway/test_runner_startup_failures.py index 96d5d4627b..83ffc0d4d0 100644 --- a/tests/gateway/test_runner_startup_failures.py +++ b/tests/gateway/test_runner_startup_failures.py @@ -184,8 +184,15 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p async def stop(self): return None - monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42) - monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None) + # get_running_pid returns 42 before we kill the old gateway, then None + # after remove_pid_file() clears the record (reflects real behavior). + _pid_state = {"alive": True} + def _mock_get_running_pid(): + return 42 if _pid_state["alive"] else None + def _mock_remove_pid_file(): + _pid_state["alive"] = False + monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid) + monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file) monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0) monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force))) monkeypatch.setattr("gateway.run.os.getpid", lambda: 100) @@ -253,8 +260,13 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm( async def stop(self): return None - monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42) - monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None) + _pid_state = {"alive": True} + def _mock_get_running_pid(): + return 42 if _pid_state["alive"] else None + def _mock_remove_pid_file(): + _pid_state["alive"] = False + monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid) + monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file) monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0) monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker) monkeypatch.setattr("gateway.status.terminate_pid", record_terminate) diff --git a/tests/gateway/test_status.py b/tests/gateway/test_status.py index 04a0856f60..6c371cfbea 100644 --- a/tests/gateway/test_status.py +++ b/tests/gateway/test_status.py @@ -19,6 +19,30 @@ class TestGatewayPidState: assert isinstance(payload["argv"], list) assert payload["argv"] + def test_write_pid_file_is_atomic_against_concurrent_writers(self, tmp_path, monkeypatch): + """Regression: two concurrent --replace invocations must not both win. + + Without O_CREAT|O_EXCL, two processes racing through start_gateway()'s + termination-wait would both write to gateway.pid, silently overwriting + each other and leaving multiple gateway instances alive (#11718). + """ + import pytest + + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + # First write wins. + status.write_pid_file() + assert (tmp_path / "gateway.pid").exists() + + # Second write (simulating a racing --replace that missed the earlier + # guards) must raise FileExistsError rather than clobber the record. + with pytest.raises(FileExistsError): + status.write_pid_file() + + # Original record is preserved. + payload = json.loads((tmp_path / "gateway.pid").read_text()) + assert payload["pid"] == os.getpid() + def test_get_running_pid_rejects_live_non_gateway_pid(self, tmp_path, monkeypatch): monkeypatch.setenv("HERMES_HOME", str(tmp_path)) pid_path = tmp_path / "gateway.pid" From 1010e5fa3cf4299486441872ec49d0baa5c5afbc Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Tue, 21 Apr 2026 12:35:10 +0530 Subject: [PATCH 10/63] refactor: remove redundant local imports already available at module level Sweep ~74 redundant local imports across 21 files where the same module was already imported at the top level. Also includes type fixes and lint cleanups on the same branch. --- acp_adapter/server.py | 4 +- agent/context_compressor.py | 2 +- agent/copilot_acp_client.py | 2 + agent/gemini_cloudcode_adapter.py | 3 +- agent/gemini_native_adapter.py | 3 +- cli.py | 40 +++---- gateway/config.py | 4 +- gateway/platforms/api_server.py | 79 ++++++------- gateway/platforms/base.py | 16 +-- gateway/platforms/discord.py | 4 +- gateway/platforms/mattermost.py | 1 - gateway/platforms/qqbot/adapter.py | 11 +- gateway/platforms/slack.py | 8 -- gateway/platforms/telegram.py | 6 +- gateway/platforms/wecom.py | 15 ++- gateway/run.py | 61 +++------- hermes_cli/config.py | 3 - hermes_cli/gateway.py | 4 - hermes_cli/main.py | 16 +-- hermes_cli/models.py | 3 - hermes_cli/setup.py | 3 - hermes_cli/web_server.py | 4 +- run_agent.py | 19 +-- tests/gateway/test_api_server_jobs.py | 162 +++++++++++++------------- tools/browser_tool.py | 4 - tools/checkpoint_manager.py | 1 - tools/mcp_tool.py | 1 - tools/process_registry.py | 17 ++- tools/skills_tool.py | 1 - tools/terminal_tool.py | 3 +- uv.lock | 105 +++++++++++++++-- 31 files changed, 289 insertions(+), 316 deletions(-) diff --git a/acp_adapter/server.py b/acp_adapter/server.py index 4685a68a8c..119a08685a 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -613,8 +613,8 @@ class HermesACPAgent(acp.Agent): await self._conn.session_update( session_id=session_id, update=AvailableCommandsUpdate( - sessionUpdate="available_commands_update", - availableCommands=self._available_commands(), + session_update="available_commands_update", + available_commands=self._available_commands(), ), ) except Exception: diff --git a/agent/context_compressor.py b/agent/context_compressor.py index f56515dabe..254ac0ac5e 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -807,7 +807,7 @@ The user has requested that this compaction PRIORITISE preserving all informatio ) self.summary_model = "" # empty = use main model self._summary_failure_cooldown_until = 0.0 # no cooldown - return self._generate_summary(messages, summary_budget) # retry immediately + return self._generate_summary(turns_to_summarize) # retry immediately # Transient errors (timeout, rate limit, network) — shorter cooldown _transient_cooldown = 60 diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 031c58d705..7a0d3dfd65 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -386,6 +386,8 @@ class CopilotACPClient: stderr_tail: deque[str] = deque(maxlen=40) def _stdout_reader() -> None: + if proc.stdout is None: + return for line in proc.stdout: try: inbox.put(json.loads(line)) diff --git a/agent/gemini_cloudcode_adapter.py b/agent/gemini_cloudcode_adapter.py index b5a8fb9272..24866c3a53 100644 --- a/agent/gemini_cloudcode_adapter.py +++ b/agent/gemini_cloudcode_adapter.py @@ -799,7 +799,8 @@ def _gemini_http_error(response: httpx.Response) -> CodeAssistError: err_obj = {} err_status = str(err_obj.get("status") or "").strip() err_message = str(err_obj.get("message") or "").strip() - err_details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else [] + _raw_details = err_obj.get("details") + err_details_list = _raw_details if isinstance(_raw_details, list) else [] # Extract google.rpc.ErrorInfo reason + metadata. There may be more # than one ErrorInfo (rare), so we pick the first one with a reason. diff --git a/agent/gemini_native_adapter.py b/agent/gemini_native_adapter.py index 8418cec987..406e4a19b7 100644 --- a/agent/gemini_native_adapter.py +++ b/agent/gemini_native_adapter.py @@ -613,7 +613,8 @@ def gemini_http_error(response: httpx.Response) -> GeminiAPIError: err_obj = {} err_status = str(err_obj.get("status") or "").strip() err_message = str(err_obj.get("message") or "").strip() - details_list = err_obj.get("details") if isinstance(err_obj.get("details"), list) else [] + _raw_details = err_obj.get("details") + details_list = _raw_details if isinstance(_raw_details, list) else [] reason = "" retry_after: Optional[float] = None diff --git a/cli.py b/cli.py index 18aeb27161..a045550dd7 100644 --- a/cli.py +++ b/cli.py @@ -529,7 +529,6 @@ def load_cli_config() -> Dict[str, Any]: if _file_has_terminal_config or env_var not in os.environ: val = terminal_config[config_key] if isinstance(val, list): - import json os.environ[env_var] = json.dumps(val) else: os.environ[env_var] = str(val) @@ -1144,8 +1143,6 @@ def _rich_text_from_ansi(text: str) -> _RichText: def _strip_markdown_syntax(text: str) -> str: """Best-effort markdown marker removal for plain-text display.""" - import re - plain = _rich_text_from_ansi(text or "").plain plain = re.sub(r"^\s{0,3}(?:[-*_]\s*){3,}$", "", plain, flags=re.MULTILINE) plain = re.sub(r"^\s{0,3}#{1,6}\s+", "", plain, flags=re.MULTILINE) @@ -2002,8 +1999,7 @@ class HermesCLI: def _invalidate(self, min_interval: float = 0.25) -> None: """Throttled UI repaint — prevents terminal blinking on slow/SSH connections.""" - import time as _time - now = _time.monotonic() + now = time.monotonic() if hasattr(self, "_app") and self._app and (now - self._last_invalidate) >= min_interval: self._last_invalidate = now self._app.invalidate() @@ -2221,8 +2217,7 @@ class HermesCLI: return "" t0 = getattr(self, "_tool_start_time", 0) or 0 if t0 > 0: - import time as _time - elapsed = _time.monotonic() - t0 + elapsed = time.monotonic() - t0 if elapsed >= 60: _m, _s = int(elapsed // 60), int(elapsed % 60) elapsed_str = f"{_m}m {_s}s" @@ -2477,9 +2472,6 @@ class HermesCLI: def _emit_reasoning_preview(self, reasoning_text: str) -> None: """Render a buffered reasoning preview as a single [thinking] block.""" - import re - import textwrap - preview_text = reasoning_text.strip() if not preview_text: return @@ -2598,9 +2590,7 @@ class HermesCLI: """Expand [Pasted text #N -> file] placeholders into file contents.""" if not isinstance(text, str) or "[Pasted text #" not in text: return text or "" - import re as _re - - paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]') + paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]') def _expand_ref(match): path = Path(match.group(1)) @@ -2923,9 +2913,7 @@ class HermesCLI: def _command_spinner_frame(self) -> str: """Return the current spinner frame for slow slash commands.""" - import time as _time - - frame_idx = int(_time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES) + frame_idx = int(time.monotonic() * 10) % len(_COMMAND_SPINNER_FRAMES) return _COMMAND_SPINNER_FRAMES[frame_idx] @contextmanager @@ -3936,7 +3924,6 @@ class HermesCLI: image later with ``vision_analyze`` if needed. """ import asyncio as _asyncio - import json as _json from tools.vision_tools import vision_analyze_tool analysis_prompt = ( @@ -3956,7 +3943,7 @@ class HermesCLI: result_json = _asyncio.run( vision_analyze_tool(image_url=str(img_path), user_prompt=analysis_prompt) ) - result = _json.loads(result_json) + result = json.loads(result_json) if result.get("success"): description = result.get("analysis", "") enriched_parts.append( @@ -6282,8 +6269,7 @@ class HermesCLI: # with the output (fixes #2718). if self._app: self._app.invalidate() - import time as _tmod - _tmod.sleep(0.05) # brief pause for refresh + time.sleep(0.05) # brief pause for refresh print() ChatConsole().print(f"[{_accent_hex()}]{'─' * 40}[/]") _cprint(f" ✅ Background task #{task_num} complete") @@ -6323,8 +6309,7 @@ class HermesCLI: # Same TUI refresh pattern as success path (#2718) if self._app: self._app.invalidate() - import time as _tmod - _tmod.sleep(0.05) + time.sleep(0.05) print() _cprint(f" ❌ Background task #{task_num} failed: {e}") finally: @@ -6544,7 +6529,6 @@ class HermesCLI: _launched = self._try_launch_chrome_debug(_port, _plat.system()) if _launched: # Wait for the port to come up - import time as _time for _wait in range(10): try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -6554,7 +6538,7 @@ class HermesCLI: _already_open = True break except (OSError, socket.timeout): - _time.sleep(0.5) + time.sleep(0.5) if _already_open: print(f" ✓ Chrome launched and listening on port {_port}") else: @@ -7084,7 +7068,6 @@ class HermesCLI: known state. When a change is detected, triggers _reload_mcp() and informs the user so they know the tool list has been refreshed. """ - import time import yaml as _yaml CONFIG_WATCH_INTERVAL = 5.0 # seconds between config.yaml stat() calls @@ -7943,7 +7926,9 @@ class HermesCLI: return selected = state.get("selected", 0) - choices = state.get("choices") or [] + choices = state.get("choices") + if not isinstance(choices, list): + choices = [] if not (0 <= selected < len(choices)): return @@ -10025,7 +10010,8 @@ class HermesCLI: if stage == "provider": title = "⚙ Model Picker — Select Provider" choices = [] - for p in state.get("providers") or []: + _providers = state.get("providers") + for p in _providers if isinstance(_providers, list) else []: count = p.get("total_models", len(p.get("models", []))) label = f"{p['name']} ({count} model{'s' if count != 1 else ''})" if p.get("is_current"): diff --git a/gateway/config.py b/gateway/config.py index 7e95a87a83..d1d84da106 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -670,8 +670,7 @@ def load_gateway_config() -> GatewayConfig: if "require_mention" in telegram_cfg and not os.getenv("TELEGRAM_REQUIRE_MENTION"): os.environ["TELEGRAM_REQUIRE_MENTION"] = str(telegram_cfg["require_mention"]).lower() if "mention_patterns" in telegram_cfg and not os.getenv("TELEGRAM_MENTION_PATTERNS"): - import json as _json - os.environ["TELEGRAM_MENTION_PATTERNS"] = _json.dumps(telegram_cfg["mention_patterns"]) + os.environ["TELEGRAM_MENTION_PATTERNS"] = json.dumps(telegram_cfg["mention_patterns"]) frc = telegram_cfg.get("free_response_chats") if frc is not None and not os.getenv("TELEGRAM_FREE_RESPONSE_CHATS"): if isinstance(frc, list): @@ -1259,7 +1258,6 @@ def _apply_env_overrides(config: GatewayConfig) -> None: if legacy_home: qq_home = legacy_home qq_home_name_env = "QQ_HOME_CHANNEL_NAME" - import logging logging.getLogger(__name__).warning( "QQ_HOME_CHANNEL is deprecated; rename to QQBOT_HOME_CHANNEL " "in your .env for consistency with the platform key." diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index 8bbf16e17e..a6b52ff323 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -323,7 +323,6 @@ class ResponseStore: ).fetchone() if row is None: return None - import time self._conn.execute( "UPDATE responses SET accessed_at = ? WHERE response_id = ?", (time.time(), response_id), @@ -333,7 +332,6 @@ class ResponseStore: def put(self, response_id: str, data: Dict[str, Any]) -> None: """Store a response, evicting the oldest if at capacity.""" - import time self._conn.execute( "INSERT OR REPLACE INTO responses (response_id, data, accessed_at) VALUES (?, ?, ?)", (response_id, json.dumps(data, default=str), time.time()), @@ -474,8 +472,7 @@ class _IdempotencyCache: self._max = max_items def _purge(self): - import time as _t - now = _t.time() + now = time.time() expired = [k for k, v in self._store.items() if now - v["ts"] > self._ttl] for k in expired: self._store.pop(k, None) @@ -537,6 +534,30 @@ def _derive_chat_session_id( return f"api-{digest}" +_CRON_AVAILABLE = False +try: + from cron.jobs import ( + list_jobs as _cron_list, + get_job as _cron_get, + create_job as _cron_create, + update_job as _cron_update, + remove_job as _cron_remove, + pause_job as _cron_pause, + resume_job as _cron_resume, + trigger_job as _cron_trigger, + ) + _CRON_AVAILABLE = True +except ImportError: + _cron_list = None + _cron_get = None + _cron_create = None + _cron_update = None + _cron_remove = None + _cron_pause = None + _cron_resume = None + _cron_trigger = None + + class APIServerAdapter(BasePlatformAdapter): """ OpenAI-compatible HTTP API server adapter. @@ -1866,44 +1887,16 @@ class APIServerAdapter(BasePlatformAdapter): # Cron jobs API # ------------------------------------------------------------------ - # Check cron module availability once (not per-request) - _CRON_AVAILABLE = False - try: - from cron.jobs import ( - list_jobs as _cron_list, - get_job as _cron_get, - create_job as _cron_create, - update_job as _cron_update, - remove_job as _cron_remove, - pause_job as _cron_pause, - resume_job as _cron_resume, - trigger_job as _cron_trigger, - ) - # Wrap as staticmethod to prevent descriptor binding — these are plain - # module functions, not instance methods. Without this, self._cron_*() - # injects ``self`` as the first positional argument and every call - # raises TypeError. - _cron_list = staticmethod(_cron_list) - _cron_get = staticmethod(_cron_get) - _cron_create = staticmethod(_cron_create) - _cron_update = staticmethod(_cron_update) - _cron_remove = staticmethod(_cron_remove) - _cron_pause = staticmethod(_cron_pause) - _cron_resume = staticmethod(_cron_resume) - _cron_trigger = staticmethod(_cron_trigger) - _CRON_AVAILABLE = True - except ImportError: - pass - _JOB_ID_RE = __import__("re").compile(r"[a-f0-9]{12}") # Allowed fields for update — prevents clients injecting arbitrary keys _UPDATE_ALLOWED_FIELDS = {"name", "schedule", "prompt", "deliver", "skills", "skill", "repeat", "enabled"} _MAX_NAME_LENGTH = 200 _MAX_PROMPT_LENGTH = 5000 - def _check_jobs_available(self) -> Optional["web.Response"]: + @staticmethod + def _check_jobs_available() -> Optional["web.Response"]: """Return error response if cron module isn't available.""" - if not self._CRON_AVAILABLE: + if not _CRON_AVAILABLE: return web.json_response( {"error": "Cron module not available"}, status=501, ) @@ -1928,7 +1921,7 @@ class APIServerAdapter(BasePlatformAdapter): return cron_err try: include_disabled = request.query.get("include_disabled", "").lower() in ("true", "1") - jobs = self._cron_list(include_disabled=include_disabled) + jobs = _cron_list(include_disabled=include_disabled) return web.json_response({"jobs": jobs}) except Exception as e: return web.json_response({"error": str(e)}, status=500) @@ -1976,7 +1969,7 @@ class APIServerAdapter(BasePlatformAdapter): if repeat is not None: kwargs["repeat"] = repeat - job = self._cron_create(**kwargs) + job = _cron_create(**kwargs) return web.json_response({"job": job}) except Exception as e: return web.json_response({"error": str(e)}, status=500) @@ -1993,7 +1986,7 @@ class APIServerAdapter(BasePlatformAdapter): if id_err: return id_err try: - job = self._cron_get(job_id) + job = _cron_get(job_id) if not job: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"job": job}) @@ -2026,7 +2019,7 @@ class APIServerAdapter(BasePlatformAdapter): return web.json_response( {"error": f"Prompt must be ≤ {self._MAX_PROMPT_LENGTH} characters"}, status=400, ) - job = self._cron_update(job_id, sanitized) + job = _cron_update(job_id, sanitized) if not job: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"job": job}) @@ -2045,7 +2038,7 @@ class APIServerAdapter(BasePlatformAdapter): if id_err: return id_err try: - success = self._cron_remove(job_id) + success = _cron_remove(job_id) if not success: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"ok": True}) @@ -2064,7 +2057,7 @@ class APIServerAdapter(BasePlatformAdapter): if id_err: return id_err try: - job = self._cron_pause(job_id) + job = _cron_pause(job_id) if not job: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"job": job}) @@ -2083,7 +2076,7 @@ class APIServerAdapter(BasePlatformAdapter): if id_err: return id_err try: - job = self._cron_resume(job_id) + job = _cron_resume(job_id) if not job: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"job": job}) @@ -2102,7 +2095,7 @@ class APIServerAdapter(BasePlatformAdapter): if id_err: return id_err try: - job = self._cron_trigger(job_id) + job = _cron_trigger(job_id) if not job: return web.json_response({"error": "Job not found"}, status=404) return web.json_response({"job": job}) diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index bda137cf3b..86a867c107 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -391,12 +391,9 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) -> if not is_safe_url(url): raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}") - import asyncio import httpx - import logging as _logging - _log = _logging.getLogger(__name__) + _log = logging.getLogger(__name__) - last_exc = None async with httpx.AsyncClient( timeout=30.0, follow_redirects=True, @@ -414,7 +411,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) -> response.raise_for_status() return cache_image_from_bytes(response.content, ext) except (httpx.TimeoutException, httpx.HTTPStatusError) as exc: - last_exc = exc if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429: raise if attempt < retries: @@ -430,7 +426,6 @@ async def cache_image_from_url(url: str, ext: str = ".jpg", retries: int = 2) -> await asyncio.sleep(wait) continue raise - raise last_exc def cleanup_image_cache(max_age_hours: int = 24) -> int: @@ -510,12 +505,9 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> if not is_safe_url(url): raise ValueError(f"Blocked unsafe URL (SSRF protection): {safe_url_for_log(url)}") - import asyncio import httpx - import logging as _logging - _log = _logging.getLogger(__name__) + _log = logging.getLogger(__name__) - last_exc = None async with httpx.AsyncClient( timeout=30.0, follow_redirects=True, @@ -533,7 +525,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> response.raise_for_status() return cache_audio_from_bytes(response.content, ext) except (httpx.TimeoutException, httpx.HTTPStatusError) as exc: - last_exc = exc if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429: raise if attempt < retries: @@ -549,7 +540,6 @@ async def cache_audio_from_url(url: str, ext: str = ".ogg", retries: int = 2) -> await asyncio.sleep(wait) continue raise - raise last_exc # --------------------------------------------------------------------------- @@ -1787,8 +1777,6 @@ class BasePlatformAdapter(ABC): HERMES_HUMAN_DELAY_MIN_MS: minimum delay in ms (default 800, custom mode) HERMES_HUMAN_DELAY_MAX_MS: maximum delay in ms (default 2500, custom mode) """ - import random - mode = os.getenv("HERMES_HUMAN_DELAY_MODE", "off").lower() if mode == "off": return 0.0 diff --git a/gateway/platforms/discord.py b/gateway/platforms/discord.py index 2b45b2b580..d43e18d73d 100644 --- a/gateway/platforms/discord.py +++ b/gateway/platforms/discord.py @@ -541,7 +541,6 @@ class DiscordAdapter(BasePlatformAdapter): # ctypes.util.find_library fails on macOS with Homebrew-installed libs, # so fall back to known Homebrew paths if needed. if not opus_path: - import sys _homebrew_paths = ( "/opt/homebrew/lib/libopus.dylib", # Apple Silicon "/usr/local/lib/libopus.dylib", # Intel Mac @@ -1422,8 +1421,7 @@ class DiscordAdapter(BasePlatformAdapter): speaking_user_ids: set = set() receiver = self._voice_receivers.get(guild_id) if receiver: - import time as _time - now = _time.monotonic() + now = time.monotonic() with receiver._lock: for ssrc, last_t in receiver._last_packet_time.items(): # Consider "speaking" if audio received within last 2 seconds diff --git a/gateway/platforms/mattermost.py b/gateway/platforms/mattermost.py index 10539bf646..0e6c9631d7 100644 --- a/gateway/platforms/mattermost.py +++ b/gateway/platforms/mattermost.py @@ -410,7 +410,6 @@ class MattermostAdapter(BasePlatformAdapter): logger.warning("Mattermost: blocked unsafe URL (SSRF protection)") return await self.send(chat_id, f"{caption or ''}\n{url}".strip(), reply_to) - import asyncio import aiohttp last_exc = None diff --git a/gateway/platforms/qqbot/adapter.py b/gateway/platforms/qqbot/adapter.py index ced7442711..df3987f2eb 100644 --- a/gateway/platforms/qqbot/adapter.py +++ b/gateway/platforms/qqbot/adapter.py @@ -1086,11 +1086,8 @@ class QQAdapter(BasePlatformAdapter): return MessageType.VIDEO if "image" in first_type or "photo" in first_type: return MessageType.PHOTO - # Unknown content type with an attachment — don't assume PHOTO - # to prevent non-image files from being sent to vision analysis. logger.debug( - "[%s] Unknown media content_type '%s', defaulting to TEXT", - self._log_tag, + "Unknown media content_type '%s', defaulting to TEXT", first_type, ) return MessageType.TEXT @@ -1826,14 +1823,12 @@ class QQAdapter(BasePlatformAdapter): body["file_name"] = file_name # Retry transient upload failures - last_exc = None for attempt in range(3): try: return await self._api_request( "POST", path, body, timeout=FILE_UPLOAD_TIMEOUT ) except RuntimeError as exc: - last_exc = exc err_msg = str(exc) if any( kw in err_msg @@ -1842,8 +1837,8 @@ class QQAdapter(BasePlatformAdapter): raise if attempt < 2: await asyncio.sleep(1.5 * (attempt + 1)) - - raise last_exc # type: ignore[misc] + else: + raise # Maximum time (seconds) to wait for reconnection before giving up on send. _RECONNECT_WAIT_SECONDS = 15.0 diff --git a/gateway/platforms/slack.py b/gateway/platforms/slack.py index d3d2187948..6a08f04666 100644 --- a/gateway/platforms/slack.py +++ b/gateway/platforms/slack.py @@ -1600,11 +1600,9 @@ class SlackAdapter(BasePlatformAdapter): async def _download_slack_file(self, url: str, ext: str, audio: bool = False, team_id: str = "") -> str: """Download a Slack file using the bot token for auth, with retry.""" - import asyncio import httpx bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token - last_exc = None async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client: for attempt in range(3): @@ -1634,7 +1632,6 @@ class SlackAdapter(BasePlatformAdapter): from gateway.platforms.base import cache_image_from_bytes return cache_image_from_bytes(response.content, ext) except (httpx.TimeoutException, httpx.HTTPStatusError) as exc: - last_exc = exc if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429: raise if attempt < 2: @@ -1643,15 +1640,12 @@ class SlackAdapter(BasePlatformAdapter): await asyncio.sleep(1.5 * (attempt + 1)) continue raise - raise last_exc async def _download_slack_file_bytes(self, url: str, team_id: str = "") -> bytes: """Download a Slack file and return raw bytes, with retry.""" - import asyncio import httpx bot_token = self._team_clients[team_id].token if team_id and team_id in self._team_clients else self.config.token - last_exc = None async with httpx.AsyncClient(timeout=30.0, follow_redirects=True) as client: for attempt in range(3): @@ -1663,7 +1657,6 @@ class SlackAdapter(BasePlatformAdapter): response.raise_for_status() return response.content except (httpx.TimeoutException, httpx.HTTPStatusError) as exc: - last_exc = exc if isinstance(exc, httpx.HTTPStatusError) and exc.response.status_code < 429: raise if attempt < 2: @@ -1672,7 +1665,6 @@ class SlackAdapter(BasePlatformAdapter): await asyncio.sleep(1.5 * (attempt + 1)) continue raise - raise last_exc # ── Channel mention gating ───────────────────────────────────────────── diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 67be808be4..cfad233e68 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -1713,7 +1713,6 @@ class TelegramAdapter(BasePlatformAdapter): return SendResult(success=False, error="Not connected") try: - import os if not os.path.exists(audio_path): return SendResult(success=False, error=self._missing_media_path_error("Audio", audio_path)) @@ -1762,7 +1761,6 @@ class TelegramAdapter(BasePlatformAdapter): return SendResult(success=False, error="Not connected") try: - import os if not os.path.exists(image_path): return SendResult(success=False, error=self._missing_media_path_error("Image", image_path)) @@ -2823,13 +2821,11 @@ class TelegramAdapter(BasePlatformAdapter): logger.info("[Telegram] Analyzing sticker at %s", cached_path) from tools.vision_tools import vision_analyze_tool - import json as _json - result_json = await vision_analyze_tool( image_url=cached_path, user_prompt=STICKER_VISION_PROMPT, ) - result = _json.loads(result_json) + result = json.loads(result_json) if result.get("success"): description = result.get("analysis", "a sticker") diff --git a/gateway/platforms/wecom.py b/gateway/platforms/wecom.py index 9e5dd04e0d..8cfc5c2c65 100644 --- a/gateway/platforms/wecom.py +++ b/gateway/platforms/wecom.py @@ -624,13 +624,16 @@ class WeComAdapter(BasePlatformAdapter): msgtype = str(body.get("msgtype") or "").lower() if msgtype == "mixed": - mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {} - items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else [] + _raw_mixed = body.get("mixed") + mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {} + _raw_items = mixed.get("msg_item") + items = _raw_items if isinstance(_raw_items, list) else [] for item in items: if not isinstance(item, dict): continue if str(item.get("msgtype") or "").lower() == "text": - text_block = item.get("text") if isinstance(item.get("text"), dict) else {} + _raw_text = item.get("text") + text_block = _raw_text if isinstance(_raw_text, dict) else {} content = str(text_block.get("content") or "").strip() if content: text_parts.append(content) @@ -672,8 +675,10 @@ class WeComAdapter(BasePlatformAdapter): msgtype = str(body.get("msgtype") or "").lower() if msgtype == "mixed": - mixed = body.get("mixed") if isinstance(body.get("mixed"), dict) else {} - items = mixed.get("msg_item") if isinstance(mixed.get("msg_item"), list) else [] + _raw_mixed = body.get("mixed") + mixed = _raw_mixed if isinstance(_raw_mixed, dict) else {} + _raw_items = mixed.get("msg_item") + items = _raw_items if isinstance(_raw_items, list) else [] for item in items: if not isinstance(item, dict): continue diff --git a/gateway/run.py b/gateway/run.py index bd034854d7..647027003c 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1266,7 +1266,6 @@ class GatewayRunner: the prefill_messages_file key in ~/.hermes/config.yaml. Relative paths are resolved from ~/.hermes/. """ - import json as _json file_path = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") if not file_path: try: @@ -1288,7 +1287,7 @@ class GatewayRunner: return [] try: with open(path, "r", encoding="utf-8") as f: - data = _json.load(f) + data = json.load(f) if not isinstance(data, list): logger.warning("Prefill messages file must contain a JSON array: %s", path) return [] @@ -3675,9 +3674,8 @@ class GatewayRunner: plugin_handler = get_plugin_command_handler(command.replace("_", "-")) if plugin_handler: user_args = event.get_command_args().strip() - import asyncio as _aio result = plugin_handler(user_args) - if _aio.iscoroutine(result): + if asyncio.iscoroutine(result): result = await result return str(result) if result else None except Exception as e: @@ -3871,13 +3869,10 @@ class GatewayRunner: if not mtype.startswith(("application/", "text/")): continue - import os as _os - import re as _re - - basename = _os.path.basename(path) + basename = os.path.basename(path) parts = basename.split("_", 2) display_name = parts[2] if len(parts) >= 3 else basename - display_name = _re.sub(r'[^\w.\- ]', '_', display_name) + display_name = re.sub(r'[^\w.\- ]', '_', display_name) if mtype.startswith("text/"): context_note = ( @@ -5175,7 +5170,6 @@ class GatewayRunner: # Save the requester's routing info so the new gateway process can # notify them once it comes back online. try: - import json as _json notify_data = { "platform": event.source.platform.value if event.source.platform else None, "chat_id": event.source.chat_id, @@ -5183,7 +5177,7 @@ class GatewayRunner: if event.source.thread_id: notify_data["thread_id"] = event.source.thread_id (_hermes_home / ".restart_notify.json").write_text( - _json.dumps(notify_data) + json.dumps(notify_data) ) except Exception as e: logger.debug("Failed to write restart notify file: %s", e) @@ -5194,16 +5188,14 @@ class GatewayRunner: # marker persists so the new gateway can still detect a delayed # /restart redelivery from Telegram. Overwritten on every /restart. try: - import json as _json - import time as _time dedup_data = { "platform": event.source.platform.value if event.source.platform else None, - "requested_at": _time.time(), + "requested_at": time.time(), } if event.platform_update_id is not None: dedup_data["update_id"] = event.platform_update_id (_hermes_home / ".restart_last_processed.json").write_text( - _json.dumps(dedup_data) + json.dumps(dedup_data) ) except Exception as e: logger.debug("Failed to write restart dedup marker: %s", e) @@ -5251,12 +5243,10 @@ class GatewayRunner: return False try: - import json as _json - import time as _time marker_path = _hermes_home / ".restart_last_processed.json" if not marker_path.exists(): return False - data = _json.loads(marker_path.read_text()) + data = json.loads(marker_path.read_text()) except Exception: return False @@ -5270,7 +5260,7 @@ class GatewayRunner: # swallow a fresh /restart from the user. requested_at = data.get("requested_at") if isinstance(requested_at, (int, float)): - if _time.time() - requested_at > 300: + if time.time() - requested_at > 300: return False return event.platform_update_id <= recorded_uid @@ -7352,13 +7342,10 @@ class GatewayRunner: async def _handle_insights_command(self, event: MessageEvent) -> str: """Handle /insights command -- show usage insights and analytics.""" - import asyncio as _asyncio - args = event.get_command_args().strip() # Normalize Unicode dashes (Telegram/iOS auto-converts -- to em/en dash) - import re as _re - args = _re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args) + args = re.sub(r'[\u2012\u2013\u2014\u2015](days|source)', r'--\1', args) days = 30 source = None @@ -7387,7 +7374,7 @@ class GatewayRunner: from hermes_state import SessionDB from agent.insights import InsightsEngine - loop = _asyncio.get_running_loop() + loop = asyncio.get_running_loop() def _run_insights(): db = SessionDB() @@ -7745,9 +7732,6 @@ class GatewayRunner: the messenger. The user's next message is intercepted by ``_handle_message`` and written to ``.update_response``. """ - import json - import re as _re - pending_path = _hermes_home / ".update_pending.json" claimed_path = _hermes_home / ".update_pending.claimed.json" output_path = _hermes_home / ".update_output.txt" @@ -7792,7 +7776,7 @@ class GatewayRunner: return def _strip_ansi(text: str) -> str: - return _re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text) + return re.sub(r'\x1b\[[0-9;]*[A-Za-z]', '', text) bytes_sent = 0 last_stream_time = loop.time() @@ -7940,9 +7924,6 @@ class GatewayRunner: cannot resolve the adapter (e.g. after a gateway restart where the platform hasn't reconnected yet). """ - import json - import re as _re - pending_path = _hermes_home / ".update_pending.json" claimed_path = _hermes_home / ".update_pending.claimed.json" output_path = _hermes_home / ".update_output.txt" @@ -7988,7 +7969,7 @@ class GatewayRunner: if adapter and chat_id: # Strip ANSI escape codes for clean display - output = _re.sub(r'\x1b\[[0-9;]*m', '', output).strip() + output = re.sub(r'\x1b\[[0-9;]*m', '', output).strip() if output: if len(output) > 3500: output = "…" + output[-3500:] @@ -8021,14 +8002,12 @@ class GatewayRunner: async def _send_restart_notification(self) -> None: """Notify the chat that initiated /restart that the gateway is back.""" - import json as _json - notify_path = _hermes_home / ".restart_notify.json" if not notify_path.exists(): return try: - data = _json.loads(notify_path.read_text()) + data = json.loads(notify_path.read_text()) platform_str = data.get("platform") chat_id = data.get("chat_id") thread_id = data.get("thread_id") @@ -8114,7 +8093,6 @@ class GatewayRunner: The enriched message string with vision descriptions prepended. """ from tools.vision_tools import vision_analyze_tool - import json as _json analysis_prompt = ( "Describe everything visible in this image in thorough detail. " @@ -8130,7 +8108,7 @@ class GatewayRunner: image_url=path, user_prompt=analysis_prompt, ) - result = _json.loads(result_json) + result = json.loads(result_json) if result.get("success"): description = result.get("analysis", "") enriched_parts.append( @@ -8189,7 +8167,6 @@ class GatewayRunner: return disabled_note from tools.transcription_tools import transcribe_audio - import asyncio enriched_parts = [] for path in audio_paths: @@ -9236,8 +9213,7 @@ class GatewayRunner: if args: from agent.display import get_tool_preview_max_len _pl = get_tool_preview_max_len() - import json as _json - args_str = _json.dumps(args, ensure_ascii=False, default=str) + args_str = json.dumps(args, ensure_ascii=False, default=str) # When tool_preview_length is 0 (default), don't truncate # in verbose mode — the user explicitly asked for full # detail. Platform message-length limits handle the rest. @@ -10752,7 +10728,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = # The PID file is scoped to HERMES_HOME, so future multi-profile # setups (each profile using a distinct HERMES_HOME) will naturally # allow concurrent instances without tripping this guard. - import time as _time from gateway.status import get_running_pid, remove_pid_file, terminate_pid existing_pid = get_running_pid() if existing_pid is not None and existing_pid != os.getpid(): @@ -10792,7 +10767,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = for _ in range(20): try: os.kill(existing_pid, 0) - _time.sleep(0.5) + time.sleep(0.5) except (ProcessLookupError, PermissionError): break # Process is gone else: @@ -10803,7 +10778,7 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool = ) try: terminate_pid(existing_pid, force=True) - _time.sleep(0.5) + time.sleep(0.5) except (ProcessLookupError, PermissionError, OSError): pass remove_pid_file() diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 4ed7eaf8e4..255721482a 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -2249,7 +2249,6 @@ def print_config_warnings(config: Optional[Dict[str, Any]] = None) -> None: if not issues: return - import sys lines = ["\033[33m⚠ Config issues detected in config.yaml:\033[0m"] for ci in issues: marker = "\033[31m✗\033[0m" if ci.severity == "error" else "\033[33m⚠\033[0m" @@ -2264,7 +2263,6 @@ def warn_deprecated_cwd_env_vars(config: Optional[Dict[str, Any]] = None) -> Non These env vars are deprecated — the canonical setting is terminal.cwd in config.yaml. Prints a migration hint to stderr. """ - import os, sys messaging_cwd = os.environ.get("MESSAGING_CWD") terminal_cwd_env = os.environ.get("TERMINAL_CWD") @@ -3273,7 +3271,6 @@ def _check_non_ascii_credential(key: str, value: str) -> str: bad_chars.append(f" position {i}: {ch!r} (U+{ord(ch):04X})") sanitized = value.encode("ascii", errors="ignore").decode("ascii") - import sys print( f"\n Warning: {key} contains non-ASCII characters that will break API requests.\n" f" This usually happens when copy-pasting from a PDF, rich-text editor,\n" diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index bc809cadf9..f7c9cfff8d 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -994,8 +994,6 @@ def get_systemd_linger_status() -> tuple[bool | None, str]: if not is_linux(): return None, "not supported on this platform" - import shutil - if not shutil.which("loginctl"): return None, "loginctl not found" @@ -1347,7 +1345,6 @@ def _ensure_linger_enabled() -> None: return import getpass - import shutil username = getpass.getuser() linger_file = Path(f"/var/lib/systemd/linger/{username}") @@ -1656,7 +1653,6 @@ def get_launchd_label() -> str: def _launchd_domain() -> str: - import os return f"gui/{os.getuid()}" diff --git a/hermes_cli/main.py b/hermes_cli/main.py index adac54fb4c..9fa8f0e52b 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -618,7 +618,6 @@ def _exec_in_container(container_info: dict, cli_args: list): container_info: dict with backend, container_name, exec_user, hermes_bin cli_args: the original CLI arguments (everything after 'hermes') """ - import shutil backend = container_info["backend"] container_name = container_info["container_name"] @@ -1181,8 +1180,6 @@ def cmd_gateway(args): def cmd_whatsapp(args): """Set up WhatsApp: choose mode, configure, install bridge, pair via QR.""" _require_tty("whatsapp") - import subprocess - from pathlib import Path from hermes_cli.config import get_env_value, save_env_value print() @@ -1425,8 +1422,6 @@ def select_provider_and_model(args=None): # Read effective provider the same way the CLI does at startup: # config.yaml model.provider > env var > auto-detect - import os - config_provider = None model_cfg = config.get("model") if isinstance(model_cfg, dict): @@ -2132,7 +2127,6 @@ def _model_flow_nous(config, current_model="", args=None): save_env_value, ) from hermes_cli.nous_subscription import prompt_enable_tool_gateway - import argparse state = get_provider_auth_state("nous") if not state or not state.get("access_token"): @@ -2300,7 +2294,6 @@ def _model_flow_openai_codex(config, current_model=""): DEFAULT_CODEX_BASE_URL, ) from hermes_cli.codex_models import get_codex_model_ids - import argparse status = get_codex_auth_status() if not status.get("logged_in"): @@ -4287,9 +4280,7 @@ def _clear_bytecode_cache(root: Path) -> int: ] if os.path.basename(dirpath) == "__pycache__": try: - import shutil as _shutil - - _shutil.rmtree(dirpath) + shutil.rmtree(dirpath) removed += 1 except OSError: pass @@ -4361,7 +4352,6 @@ def _build_web_ui(web_dir: Path, *, fatal: bool = False) -> bool: """ if not (web_dir / "package.json").exists(): return True - import shutil npm = shutil.which("npm") if not npm: @@ -4398,7 +4388,6 @@ def _update_via_zip(args): Used on Windows when git file I/O is broken (antivirus, NTFS filter drivers causing 'Invalid argument' errors on file creation). """ - import shutil import tempfile import zipfile from urllib.request import urlretrieve @@ -4475,7 +4464,6 @@ def _update_via_zip(args): # breaks on this machine, keep base deps and reinstall the remaining extras # individually so update does not silently strip working capabilities. print("→ Updating Python dependencies...") - import subprocess uv_bin = shutil.which("uv") if uv_bin: @@ -8078,7 +8066,6 @@ Examples: return line = _json.dumps(data, ensure_ascii=False) + "\n" if args.output == "-": - import sys sys.stdout.write(line) else: @@ -8088,7 +8075,6 @@ Examples: else: sessions = db.export_all(source=args.source) if args.output == "-": - import sys for s in sessions: sys.stdout.write(_json.dumps(s, ensure_ascii=False) + "\n") diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 046df3519d..e8772d246d 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -515,8 +515,6 @@ def check_nous_free_tier() -> bool: Returns False (assume paid) on any error — never blocks paying users. """ global _free_tier_cache - import time - now = time.monotonic() if _free_tier_cache is not None: cached_result, cached_at = _free_tier_cache @@ -1259,7 +1257,6 @@ def detect_provider_for_model( from hermes_cli.auth import PROVIDER_REGISTRY pconfig = PROVIDER_REGISTRY.get(direct_match) if pconfig: - import os for env_var in pconfig.api_key_env_vars: if os.getenv(env_var, "").strip(): has_creds = True diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 53b0c180aa..9fcc538c75 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -849,7 +849,6 @@ def setup_model_provider(config: dict, *, quick: bool = False): def _check_espeak_ng() -> bool: """Check if espeak-ng is installed.""" - import shutil return shutil.which("espeak-ng") is not None or shutil.which("espeak") is not None @@ -1084,8 +1083,6 @@ def setup_tts(config: dict): def setup_terminal_backend(config: dict): """Configure the terminal execution backend.""" import platform as _platform - import shutil - print_header("Terminal Backend") print_info("Choose where Hermes runs shell commands and code.") print_info("This affects tool execution, file access, and isolation.") diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 93169f416a..fe6b979e44 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -2324,12 +2324,10 @@ def start_server( ) if open_browser: - import threading import webbrowser def _open(): - import time as _t - _t.sleep(1.0) + time.sleep(1.0) webbrowser.open(f"http://{host}:{port}") threading.Thread(target=_open, daemon=True).start() diff --git a/run_agent.py b/run_agent.py index e69d30ff2c..e03e285c70 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1088,8 +1088,7 @@ class AIAgent: _is_bedrock_anthropic = self.provider == "bedrock" if _is_bedrock_anthropic: from agent.anthropic_adapter import build_anthropic_bedrock_client - import re as _re - _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") + _region_match = re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") _br_region = _region_match.group(1) if _region_match else "us-east-1" self._bedrock_region = _br_region self._anthropic_client = build_anthropic_bedrock_client(_br_region) @@ -1130,8 +1129,7 @@ class AIAgent: elif self.api_mode == "bedrock_converse": # AWS Bedrock — uses boto3 directly, no OpenAI client needed. # Region is extracted from the base_url or defaults to us-east-1. - import re as _re - _region_match = _re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") + _region_match = re.search(r"bedrock-runtime\.([a-z0-9-]+)\.", base_url or "") self._bedrock_region = _region_match.group(1) if _region_match else "us-east-1" # Guardrail config — read from config.yaml at init time. self._bedrock_guardrail_config = None @@ -1576,7 +1574,6 @@ class AIAgent: "Falling back to auto-detection.", _config_context_length, ) - import sys print( f"\n⚠ Invalid model.context_length in config.yaml: {_config_context_length!r}\n" f" Must be a plain integer (e.g. 256000, not '256K').\n" @@ -1618,7 +1615,6 @@ class AIAgent: "Falling back to auto-detection.", self.model, _cp_ctx, ) - import sys print( f"\n⚠ Invalid context_length for model {self.model!r} in custom_providers: {_cp_ctx!r}\n" f" Must be a plain integer (e.g. 256000, not '256K').\n" @@ -1881,8 +1877,6 @@ class AIAgent: change persists across turns (unlike fallback which is turn-scoped). """ - import logging - import re as _re from hermes_cli.providers import determine_api_mode # ── Determine api_mode if not provided ── @@ -1900,7 +1894,7 @@ class AIAgent: and isinstance(base_url, str) and base_url ): - base_url = _re.sub(r"/v1/?$", "", base_url) + base_url = re.sub(r"/v1/?$", "", base_url) old_model = self.model old_provider = self.provider @@ -2916,7 +2910,7 @@ class AIAgent: role = msg.get("role", "unknown") content = msg.get("content") tool_calls_data = None - if hasattr(msg, "tool_calls") and msg.tool_calls: + if hasattr(msg, "tool_calls") and isinstance(msg.tool_calls, list) and msg.tool_calls: tool_calls_data = [ {"name": tc.function.name, "arguments": tc.function.arguments} for tc in msg.tool_calls @@ -3182,15 +3176,14 @@ class AIAgent: tag instead of dumping raw HTML. Falls back to a truncated str(error) for everything else. """ - import re as _re raw = str(error) # Cloudflare / proxy HTML pages: grab the <title> for a clean summary if "<!DOCTYPE" in raw or "<html" in raw: - m = _re.search(r"<title[^>]*>([^<]+)", raw, _re.IGNORECASE) + m = re.search(r"]*>([^<]+)", raw, re.IGNORECASE) title = m.group(1).strip() if m else "HTML error page (title not found)" # Also grab Cloudflare Ray ID if present - ray = _re.search(r"Cloudflare Ray ID:\s*]*>([^<]+)", raw) + ray = re.search(r"Cloudflare Ray ID:\s*]*>([^<]+)", raw) ray_id = ray.group(1).strip() if ray else None status_code = getattr(error, "status_code", None) parts = [] diff --git a/tests/gateway/test_api_server_jobs.py b/tests/gateway/test_api_server_jobs.py index 6c17bb120b..a147657838 100644 --- a/tests/gateway/test_api_server_jobs.py +++ b/tests/gateway/test_api_server_jobs.py @@ -20,6 +20,8 @@ from aiohttp.test_utils import TestClient, TestServer from gateway.config import PlatformConfig from gateway.platforms.api_server import APIServerAdapter, cors_middleware +_MOD = "gateway.platforms.api_server" + # --------------------------------------------------------------------------- # Helpers @@ -83,10 +85,10 @@ class TestListJobs: """GET /api/jobs returns job list.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_list", return_value=[SAMPLE_JOB] + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_list", return_value=[SAMPLE_JOB] ): resp = await cli.get("/api/jobs") assert resp.status == 200 @@ -104,10 +106,10 @@ class TestListJobs: app = _create_app(adapter) mock_list = MagicMock(return_value=[SAMPLE_JOB]) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_list", mock_list + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_list", mock_list ): resp = await cli.get("/api/jobs?include_disabled=true") assert resp.status == 200 @@ -119,10 +121,10 @@ class TestListJobs: app = _create_app(adapter) mock_list = MagicMock(return_value=[]) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_list", mock_list + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_list", mock_list ): resp = await cli.get("/api/jobs") assert resp.status == 200 @@ -140,10 +142,10 @@ class TestCreateJob: app = _create_app(adapter) mock_create = MagicMock(return_value=SAMPLE_JOB) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_create", mock_create + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_create", mock_create ): resp = await cli.post("/api/jobs", json={ "name": "test-job", @@ -164,7 +166,7 @@ class TestCreateJob: """POST /api/jobs without name returns 400.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.post("/api/jobs", json={ "schedule": "*/5 * * * *", "prompt": "do something", @@ -178,7 +180,7 @@ class TestCreateJob: """POST /api/jobs with name > 200 chars returns 400.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.post("/api/jobs", json={ "name": "x" * 201, "schedule": "*/5 * * * *", @@ -192,7 +194,7 @@ class TestCreateJob: """POST /api/jobs with prompt > 5000 chars returns 400.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.post("/api/jobs", json={ "name": "test-job", "schedule": "*/5 * * * *", @@ -207,7 +209,7 @@ class TestCreateJob: """POST /api/jobs with repeat=0 returns 400.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.post("/api/jobs", json={ "name": "test-job", "schedule": "*/5 * * * *", @@ -222,7 +224,7 @@ class TestCreateJob: """POST /api/jobs without schedule returns 400.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.post("/api/jobs", json={ "name": "test-job", }) @@ -242,10 +244,10 @@ class TestGetJob: app = _create_app(adapter) mock_get = MagicMock(return_value=SAMPLE_JOB) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_get", mock_get + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_get", mock_get ): resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}") assert resp.status == 200 @@ -259,10 +261,10 @@ class TestGetJob: app = _create_app(adapter) mock_get = MagicMock(return_value=None) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_get", mock_get + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_get", mock_get ): resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}") assert resp.status == 404 @@ -272,7 +274,7 @@ class TestGetJob: """GET /api/jobs/{id} with non-hex id returns 400.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.get("/api/jobs/not-a-valid-hex!") assert resp.status == 400 data = await resp.json() @@ -291,10 +293,10 @@ class TestUpdateJob: updated_job = {**SAMPLE_JOB, "name": "updated-name"} mock_update = MagicMock(return_value=updated_job) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_update", mock_update + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_update", mock_update ): resp = await cli.patch( f"/api/jobs/{VALID_JOB_ID}", @@ -317,10 +319,10 @@ class TestUpdateJob: updated_job = {**SAMPLE_JOB, "name": "new-name"} mock_update = MagicMock(return_value=updated_job) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_update", mock_update + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_update", mock_update ): resp = await cli.patch( f"/api/jobs/{VALID_JOB_ID}", @@ -342,7 +344,7 @@ class TestUpdateJob: """PATCH /api/jobs/{id} with only unknown fields returns 400.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.patch( f"/api/jobs/{VALID_JOB_ID}", json={"evil_field": "malicious"}, @@ -363,10 +365,10 @@ class TestDeleteJob: app = _create_app(adapter) mock_remove = MagicMock(return_value=True) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_remove", mock_remove + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_remove", mock_remove ): resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}") assert resp.status == 200 @@ -380,10 +382,10 @@ class TestDeleteJob: app = _create_app(adapter) mock_remove = MagicMock(return_value=False) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_remove", mock_remove + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_remove", mock_remove ): resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}") assert resp.status == 404 @@ -401,10 +403,10 @@ class TestPauseJob: paused_job = {**SAMPLE_JOB, "enabled": False} mock_pause = MagicMock(return_value=paused_job) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_pause", mock_pause + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_pause", mock_pause ): resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/pause") assert resp.status == 200 @@ -426,10 +428,10 @@ class TestResumeJob: resumed_job = {**SAMPLE_JOB, "enabled": True} mock_resume = MagicMock(return_value=resumed_job) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_resume", mock_resume + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_resume", mock_resume ): resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/resume") assert resp.status == 200 @@ -451,10 +453,10 @@ class TestRunJob: triggered_job = {**SAMPLE_JOB, "last_run": "2025-01-01T00:00:00Z"} mock_trigger = MagicMock(return_value=triggered_job) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_trigger", mock_trigger + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_trigger", mock_trigger ): resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/run") assert resp.status == 200 @@ -473,7 +475,7 @@ class TestAuthRequired: """GET /api/jobs without API key returns 401 when key is set.""" app = _create_app(auth_adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.get("/api/jobs") assert resp.status == 401 @@ -482,7 +484,7 @@ class TestAuthRequired: """POST /api/jobs without API key returns 401 when key is set.""" app = _create_app(auth_adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.post("/api/jobs", json={ "name": "test", "schedule": "* * * * *", }) @@ -493,7 +495,7 @@ class TestAuthRequired: """GET /api/jobs/{id} without API key returns 401 when key is set.""" app = _create_app(auth_adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}") assert resp.status == 401 @@ -502,7 +504,7 @@ class TestAuthRequired: """DELETE /api/jobs/{id} without API key returns 401.""" app = _create_app(auth_adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True): + with patch(f"{_MOD}._CRON_AVAILABLE", True): resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}") assert resp.status == 401 @@ -512,10 +514,10 @@ class TestAuthRequired: app = _create_app(auth_adapter) mock_list = MagicMock(return_value=[]) async with TestClient(TestServer(app)) as cli: - with patch.object( - APIServerAdapter, "_CRON_AVAILABLE", True - ), patch.object( - APIServerAdapter, "_cron_list", mock_list + with patch( + f"{_MOD}._CRON_AVAILABLE", True + ), patch( + f"{_MOD}._cron_list", mock_list ): resp = await cli.get( "/api/jobs", @@ -534,7 +536,7 @@ class TestCronUnavailable: """GET /api/jobs returns 501 when _CRON_AVAILABLE is False.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False): + with patch(f"{_MOD}._CRON_AVAILABLE", False): resp = await cli.get("/api/jobs") assert resp.status == 501 data = await resp.json() @@ -551,8 +553,8 @@ class TestCronUnavailable: return SAMPLE_JOB async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True), patch.object( - APIServerAdapter, "_cron_pause", staticmethod(_plain_pause) + with patch(f"{_MOD}._CRON_AVAILABLE", True), patch( + f"{_MOD}._cron_pause", _plain_pause ): resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/pause") assert resp.status == 200 @@ -571,8 +573,8 @@ class TestCronUnavailable: return [SAMPLE_JOB] async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True), patch.object( - APIServerAdapter, "_cron_list", staticmethod(_plain_list) + with patch(f"{_MOD}._CRON_AVAILABLE", True), patch( + f"{_MOD}._cron_list", _plain_list ): resp = await cli.get("/api/jobs?include_disabled=true") assert resp.status == 200 @@ -593,8 +595,8 @@ class TestCronUnavailable: return updated_job async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", True), patch.object( - APIServerAdapter, "_cron_update", staticmethod(_plain_update) + with patch(f"{_MOD}._CRON_AVAILABLE", True), patch( + f"{_MOD}._cron_update", _plain_update ): resp = await cli.patch( f"/api/jobs/{VALID_JOB_ID}", @@ -611,7 +613,7 @@ class TestCronUnavailable: """POST /api/jobs returns 501 when _CRON_AVAILABLE is False.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False): + with patch(f"{_MOD}._CRON_AVAILABLE", False): resp = await cli.post("/api/jobs", json={ "name": "test", "schedule": "* * * * *", }) @@ -622,7 +624,7 @@ class TestCronUnavailable: """GET /api/jobs/{id} returns 501 when _CRON_AVAILABLE is False.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False): + with patch(f"{_MOD}._CRON_AVAILABLE", False): resp = await cli.get(f"/api/jobs/{VALID_JOB_ID}") assert resp.status == 501 @@ -631,7 +633,7 @@ class TestCronUnavailable: """DELETE /api/jobs/{id} returns 501 when _CRON_AVAILABLE is False.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False): + with patch(f"{_MOD}._CRON_AVAILABLE", False): resp = await cli.delete(f"/api/jobs/{VALID_JOB_ID}") assert resp.status == 501 @@ -640,7 +642,7 @@ class TestCronUnavailable: """POST /api/jobs/{id}/pause returns 501 when _CRON_AVAILABLE is False.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False): + with patch(f"{_MOD}._CRON_AVAILABLE", False): resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/pause") assert resp.status == 501 @@ -649,7 +651,7 @@ class TestCronUnavailable: """POST /api/jobs/{id}/resume returns 501 when _CRON_AVAILABLE is False.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False): + with patch(f"{_MOD}._CRON_AVAILABLE", False): resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/resume") assert resp.status == 501 @@ -658,6 +660,6 @@ class TestCronUnavailable: """POST /api/jobs/{id}/run returns 501 when _CRON_AVAILABLE is False.""" app = _create_app(adapter) async with TestClient(TestServer(app)) as cli: - with patch.object(APIServerAdapter, "_CRON_AVAILABLE", False): + with patch(f"{_MOD}._CRON_AVAILABLE", False): resp = await cli.post(f"/api/jobs/{VALID_JOB_ID}/run") assert resp.status == 501 diff --git a/tools/browser_tool.py b/tools/browser_tool.py index 97427dc612..b19b220d1b 100644 --- a/tools/browser_tool.py +++ b/tools/browser_tool.py @@ -1911,7 +1911,6 @@ def _maybe_start_recording(task_id: str): recordings_dir.mkdir(parents=True, exist_ok=True) _cleanup_old_recordings(max_age_hours=72) - import time timestamp = time.strftime("%Y%m%d_%H%M%S") recording_path = recordings_dir / f"session_{timestamp}_{task_id[:16]}.webm" @@ -2027,8 +2026,6 @@ def browser_vision(question: str, annotate: bool = False, task_id: Optional[str] import base64 import uuid as uuid_mod - from pathlib import Path - effective_task_id = task_id or "default" # Save screenshot to persistent location so it can be shared with users @@ -2210,7 +2207,6 @@ def _cleanup_old_screenshots(screenshots_dir, max_age_hours=24): def _cleanup_old_recordings(max_age_hours=72): """Remove browser recordings older than max_age_hours to prevent disk bloat.""" - import time try: hermes_home = get_hermes_home() recordings_dir = hermes_home / "browser_recordings" diff --git a/tools/checkpoint_manager.py b/tools/checkpoint_manager.py index 277a23e449..a3beee2a79 100644 --- a/tools/checkpoint_manager.py +++ b/tools/checkpoint_manager.py @@ -389,7 +389,6 @@ class CheckpointManager: @staticmethod def _parse_shortstat(stat_line: str, entry: Dict) -> None: """Parse git --shortstat output into entry dict.""" - import re m = re.search(r'(\d+) file', stat_line) if m: entry["files_changed"] = int(m.group(1)) diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index e5e856d0bb..a0a22773e5 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -1540,7 +1540,6 @@ def _interrupted_call_result() -> str: def _interpolate_env_vars(value): """Recursively resolve ``${VAR}`` placeholders from ``os.environ``.""" if isinstance(value, str): - import re def _replace(m): return os.environ.get(m.group(1), m.group(0)) return re.sub(r"\$\{([^}]+)\}", _replace, value) diff --git a/tools/process_registry.py b/tools/process_registry.py index 92f3db2a10..ec510cae04 100644 --- a/tools/process_registry.py +++ b/tools/process_registry.py @@ -1167,32 +1167,31 @@ PROCESS_SCHEMA = { def _handle_process(args, **kw): - import json as _json task_id = kw.get("task_id") action = args.get("action", "") # Coerce to string — some models send session_id as an integer session_id = str(args.get("session_id", "")) if args.get("session_id") is not None else "" if action == "list": - return _json.dumps({"processes": process_registry.list_sessions(task_id=task_id)}, ensure_ascii=False) + return json.dumps({"processes": process_registry.list_sessions(task_id=task_id)}, ensure_ascii=False) elif action in ("poll", "log", "wait", "kill", "write", "submit", "close"): if not session_id: return tool_error(f"session_id is required for {action}") if action == "poll": - return _json.dumps(process_registry.poll(session_id), ensure_ascii=False) + return json.dumps(process_registry.poll(session_id), ensure_ascii=False) elif action == "log": - return _json.dumps(process_registry.read_log( + return json.dumps(process_registry.read_log( session_id, offset=args.get("offset", 0), limit=args.get("limit", 200)), ensure_ascii=False) elif action == "wait": - return _json.dumps(process_registry.wait(session_id, timeout=args.get("timeout")), ensure_ascii=False) + return json.dumps(process_registry.wait(session_id, timeout=args.get("timeout")), ensure_ascii=False) elif action == "kill": - return _json.dumps(process_registry.kill_process(session_id), ensure_ascii=False) + return json.dumps(process_registry.kill_process(session_id), ensure_ascii=False) elif action == "write": - return _json.dumps(process_registry.write_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False) + return json.dumps(process_registry.write_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False) elif action == "submit": - return _json.dumps(process_registry.submit_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False) + return json.dumps(process_registry.submit_stdin(session_id, str(args.get("data", ""))), ensure_ascii=False) elif action == "close": - return _json.dumps(process_registry.close_stdin(session_id), ensure_ascii=False) + return json.dumps(process_registry.close_stdin(session_id), ensure_ascii=False) return tool_error(f"Unknown process action: {action}. Use: list, poll, log, wait, kill, write, submit, close") diff --git a/tools/skills_tool.py b/tools/skills_tool.py index ed8c8cfb08..f5ab9eeacd 100644 --- a/tools/skills_tool.py +++ b/tools/skills_tool.py @@ -509,7 +509,6 @@ def _get_disabled_skill_names() -> Set[str]: def _is_skill_disabled(name: str, platform: str = None) -> bool: """Check if a skill is disabled in config.""" - import os try: from hermes_cli.config import load_config config = load_config() diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 732b50b14e..7a7dc9c1a6 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -217,7 +217,6 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str: directly from /dev/tty with echo disabled. """ import sys - import time as time_module # Use the registered callback when available (prompt_toolkit-compatible) if _sudo_password_callback is not None: @@ -278,7 +277,7 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str: try: os.environ["HERMES_SPINNER_PAUSE"] = "1" - time_module.sleep(0.2) + time.sleep(0.2) print() print("┌" + "─" * 58 + "┐") diff --git a/uv.lock b/uv.lock index 133bd3f782..33b5c6628a 100644 --- a/uv.lock +++ b/uv.lock @@ -426,7 +426,7 @@ wheels = [ [[package]] name = "atroposlib" version = "0.4.0" -source = { git = "https://github.com/NousResearch/atropos.git#c421582b6f7ce8a32f751aab3117d3824ac8f709" } +source = { git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30#c20c85256e5a45ad31edf8b7276e9c5ee1995a30" } dependencies = [ { name = "aiofiles" }, { name = "aiohttp" }, @@ -558,6 +558,34 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/cb/f2ad4230dc2eb1a74edf38f1a38b9b52277f75bef262d8908e60d957e13c/blinker-1.9.0-py3-none-any.whl", hash = "sha256:ba0efaa9080b619ff2f3459d1d500c57bddea4a6b424b60a91141db6fd2f08bc", size = 8458, upload-time = "2024-11-08T17:25:46.184Z" }, ] +[[package]] +name = "boto3" +version = "1.42.92" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, + { name = "jmespath" }, + { name = "s3transfer" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e7/3b/84cafa37e85a57618554bd2bc21bd569417097f45f18c23ef488e6c69683/boto3-1.42.92.tar.gz", hash = "sha256:55ec6ef6fc81f46d567a7d1d398d1e5c375d468905d0ccd9e1f767f0c77dbe9b", size = 113207, upload-time = "2026-04-20T19:38:17.293Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/8f/8f/350ffd50aaa515429464deb1dc85893a21a64cb41892feb6b22ce87304ad/boto3-1.42.92-py3-none-any.whl", hash = "sha256:c90d9a170faa0585755fa103a3cd9595e1f53443864e902c180f3d8177589125", size = 140555, upload-time = "2026-04-20T19:38:14.323Z" }, +] + +[[package]] +name = "botocore" +version = "1.42.92" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "jmespath" }, + { name = "python-dateutil" }, + { name = "urllib3" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/d5/0a/6785ce224ba4483b3e1282d959e1dd2c2898823336f013464c43cb154036/botocore-1.42.92.tar.gz", hash = "sha256:f1193d3057a2d0267353d7ef4e136be37ea432336d097fcb1951fae566ca3a22", size = 15235239, upload-time = "2026-04-20T19:38:05.085Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/32/b8/41d4d7ba75a4fb4f11362e96371a12695bc6ba0bb7cc680137db0213f97e/botocore-1.42.92-py3-none-any.whl", hash = "sha256:09ddefddbb1565ceef4b44b4b6e61b1ca5f12701d1494ecc85c1133d1b1e81fb", size = 14916275, upload-time = "2026-04-20T19:38:01.684Z" }, +] + [[package]] name = "cachetools" version = "5.5.2" @@ -1838,7 +1866,7 @@ wheels = [ [[package]] name = "hermes-agent" -version = "0.9.0" +version = "0.10.0" source = { editable = "." } dependencies = [ { name = "anthropic" }, @@ -1871,6 +1899,7 @@ all = [ { name = "aiosqlite", marker = "sys_platform == 'linux'" }, { name = "alibabacloud-dingtalk" }, { name = "asyncpg", marker = "sys_platform == 'linux'" }, + { name = "boto3" }, { name = "croniter" }, { name = "daytona" }, { name = "debugpy" }, @@ -1893,12 +1922,16 @@ all = [ { name = "pytest-xdist" }, { name = "python-telegram-bot", extra = ["webhooks"] }, { name = "pywinpty", marker = "sys_platform == 'win32'" }, + { name = "qrcode" }, { name = "simple-term-menu" }, { name = "slack-bolt" }, { name = "slack-sdk" }, { name = "sounddevice" }, { name = "uvicorn", extra = ["standard"] }, ] +bedrock = [ + { name = "boto3" }, +] cli = [ { name = "simple-term-menu" }, ] @@ -1918,9 +1951,11 @@ dev = [ dingtalk = [ { name = "alibabacloud-dingtalk" }, { name = "dingtalk-stream" }, + { name = "qrcode" }, ] feishu = [ { name = "lark-oapi" }, + { name = "qrcode" }, ] homeassistant = [ { name = "aiohttp" }, @@ -1941,6 +1976,7 @@ messaging = [ { name = "aiohttp" }, { name = "discord-py", extra = ["voice"] }, { name = "python-telegram-bot", extra = ["webhooks"] }, + { name = "qrcode" }, { name = "slack-bolt" }, { name = "slack-sdk" }, ] @@ -1974,6 +2010,7 @@ termux = [ { name = "honcho-ai" }, { name = "mcp" }, { name = "ptyprocess", marker = "sys_platform != 'win32'" }, + { name = "python-telegram-bot", extra = ["webhooks"] }, { name = "pywinpty", marker = "sys_platform == 'win32'" }, { name = "simple-term-menu" }, ] @@ -2003,7 +2040,8 @@ requires-dist = [ { name = "alibabacloud-dingtalk", marker = "extra == 'dingtalk'", specifier = ">=2.0.0" }, { name = "anthropic", specifier = ">=0.39.0,<1" }, { name = "asyncpg", marker = "extra == 'matrix'", specifier = ">=0.29" }, - { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git" }, + { name = "atroposlib", marker = "extra == 'rl'", git = "https://github.com/NousResearch/atropos.git?rev=c20c85256e5a45ad31edf8b7276e9c5ee1995a30" }, + { name = "boto3", marker = "extra == 'bedrock'", specifier = ">=1.35.0,<2" }, { name = "croniter", marker = "extra == 'cron'", specifier = ">=6.0.0,<7" }, { name = "daytona", marker = "extra == 'daytona'", specifier = ">=0.148.0,<1" }, { name = "debugpy", marker = "extra == 'dev'", specifier = ">=1.8.0,<2" }, @@ -2020,6 +2058,7 @@ requires-dist = [ { name = "firecrawl-py", specifier = ">=4.16.0,<5" }, { name = "hermes-agent", extras = ["acp"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["acp"], marker = "extra == 'termux'" }, + { name = "hermes-agent", extras = ["bedrock"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["cli"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["cli"], marker = "extra == 'termux'" }, { name = "hermes-agent", extras = ["cron"], marker = "extra == 'all'" }, @@ -2066,8 +2105,12 @@ requires-dist = [ { name = "pytest-xdist", marker = "extra == 'dev'", specifier = ">=3.0,<4" }, { name = "python-dotenv", specifier = ">=1.2.1,<2" }, { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'messaging'", specifier = ">=22.6,<23" }, + { name = "python-telegram-bot", extras = ["webhooks"], marker = "extra == 'termux'", specifier = ">=22.6,<23" }, { name = "pywinpty", marker = "sys_platform == 'win32' and extra == 'pty'", specifier = ">=2.0.0,<3" }, { name = "pyyaml", specifier = ">=6.0.2,<7" }, + { name = "qrcode", marker = "extra == 'dingtalk'", specifier = ">=7.0,<8" }, + { name = "qrcode", marker = "extra == 'feishu'", specifier = ">=7.0,<8" }, + { name = "qrcode", marker = "extra == 'messaging'", specifier = ">=7.0,<8" }, { name = "requests", specifier = ">=2.33.0,<3" }, { name = "rich", specifier = ">=14.3.3,<15" }, { name = "simple-term-menu", marker = "extra == 'cli'", specifier = ">=1.0,<2" }, @@ -2077,13 +2120,13 @@ requires-dist = [ { name = "slack-sdk", marker = "extra == 'slack'", specifier = ">=3.27.0,<4" }, { name = "sounddevice", marker = "extra == 'voice'", specifier = ">=0.4.6,<1" }, { name = "tenacity", specifier = ">=9.1.4,<10" }, - { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git" }, + { name = "tinker", marker = "extra == 'rl'", git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b" }, { name = "uvicorn", extras = ["standard"], marker = "extra == 'rl'", specifier = ">=0.24.0,<1" }, { name = "uvicorn", extras = ["standard"], marker = "extra == 'web'", specifier = ">=0.24.0,<1" }, { name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" }, - { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git" }, + { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c" }, ] -provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"] +provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "bedrock", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"] [[package]] name = "hf-transfer" @@ -2410,6 +2453,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/67/8a/a342b2f0251f3dac4ca17618265d93bf244a2a4d089126e81e4c1056ac50/jiter-0.13.0-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7bb00b6d26db67a05fe3e12c76edc75f32077fb51deed13822dc648fa373bc19", size = 343768, upload-time = "2026-02-02T12:37:55.055Z" }, ] +[[package]] +name = "jmespath" +version = "1.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d3/59/322338183ecda247fb5d1763a6cbe46eff7222eaeebafd9fa65d4bf5cb11/jmespath-1.1.0.tar.gz", hash = "sha256:472c87d80f36026ae83c6ddd0f1d05d4e510134ed462851fd5f754c8c3cbb88d", size = 27377, upload-time = "2026-01-22T16:35:26.279Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/14/2f/967ba146e6d58cf6a652da73885f52fc68001525b4197effc174321d70b4/jmespath-1.1.0-py3-none-any.whl", hash = "sha256:a5663118de4908c91729bea0acadca56526eb2698e83de10cd116ae0f4e97c64", size = 20419, upload-time = "2026-01-22T16:35:24.919Z" }, +] + [[package]] name = "joblib" version = "1.5.3" @@ -4109,6 +4161,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/10/bd/c038d7cc38edc1aa5bf91ab8068b63d4308c66c4c8bb3cbba7dfbc049f9c/pyparsing-3.3.2-py3-none-any.whl", hash = "sha256:850ba148bd908d7e2411587e247a1e4f0327839c40e2e5e6d05a007ecc69911d", size = 122781, upload-time = "2026-01-21T03:57:55.912Z" }, ] +[[package]] +name = "pypng" +version = "0.20220715.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/93/cd/112f092ec27cca83e0516de0a3368dbd9128c187fb6b52aaaa7cde39c96d/pypng-0.20220715.0.tar.gz", hash = "sha256:739c433ba96f078315de54c0db975aee537cbc3e1d0ae4ed9aab0ca1e427e2c1", size = 128992, upload-time = "2022-07-15T14:11:05.301Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3e/b9/3766cc361d93edb2ce81e2e1f87dd98f314d7d513877a342d31b30741680/pypng-0.20220715.0-py3-none-any.whl", hash = "sha256:4a43e969b8f5aaafb2a415536c1a8ec7e341cd6a3f957fd5b5f32a4cfeed902c", size = 58057, upload-time = "2022-07-15T14:11:03.713Z" }, +] + [[package]] name = "pytest" version = "9.0.2" @@ -4311,6 +4372,20 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f1/12/de94a39c2ef588c7e6455cfbe7343d3b2dc9d6b6b2f40c4c6565744c873d/pyyaml-6.0.3-cp314-cp314t-win_arm64.whl", hash = "sha256:ebc55a14a21cb14062aa4162f906cd962b28e2e9ea38f9b4391244cd8de4ae0b", size = 149341, upload-time = "2025-09-25T21:32:56.828Z" }, ] +[[package]] +name = "qrcode" +version = "7.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "pypng" }, + { name = "typing-extensions" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/30/35/ad6d4c5a547fe9a5baf85a9edbafff93fc6394b014fab30595877305fa59/qrcode-7.4.2.tar.gz", hash = "sha256:9dd969454827e127dbd93696b20747239e6d540e082937c90f14ac95b30f5845", size = 535974, upload-time = "2023-02-05T22:11:46.548Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/24/79/aaf0c1c7214f2632badb2771d770b1500d3d7cbdf2590ae62e721ec50584/qrcode-7.4.2-py3-none-any.whl", hash = "sha256:581dca7a029bcb2deef5d01068e39093e80ef00b4a61098a2182eac59d01643a", size = 46197, upload-time = "2023-02-05T22:11:43.4Z" }, +] + [[package]] name = "referencing" version = "0.37.0" @@ -4577,6 +4652,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d1/b7/b95708304cd49b7b6f82fdd039f1748b66ec2b21d6a45180910802f1abf1/rpds_py-0.30.0-pp311-pypy311_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:ac37f9f516c51e5753f27dfdef11a88330f04de2d564be3991384b2f3535d02e", size = 562191, upload-time = "2025-11-30T20:24:36.853Z" }, ] +[[package]] +name = "s3transfer" +version = "0.16.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "botocore" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/05/04/74127fc843314818edfa81b5540e26dd537353b123a4edc563109d8f17dd/s3transfer-0.16.0.tar.gz", hash = "sha256:8e990f13268025792229cd52fa10cb7163744bf56e719e0b9cb925ab79abf920", size = 153827, upload-time = "2025-12-01T02:30:59.114Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" }, +] + [[package]] name = "safetensors" version = "0.7.0" @@ -4927,8 +5014,8 @@ wheels = [ [[package]] name = "tinker" -version = "0.16.1" -source = { git = "https://github.com/thinking-machines-lab/tinker.git#07bd3c2dd3cd4398ac1c26f0ec0deccbf3c1f913" } +version = "0.18.0" +source = { git = "https://github.com/thinking-machines-lab/tinker.git?rev=30517b667f18a3dfb7ef33fb56cf686d5820ba2b#30517b667f18a3dfb7ef33fb56cf686d5820ba2b" } dependencies = [ { name = "anyio" }, { name = "click" }, @@ -5653,7 +5740,7 @@ wheels = [ [[package]] name = "yc-bench" version = "0.1.0" -source = { git = "https://github.com/collinear-ai/yc-bench.git#0c53c98f01a431db2e391482bc46013045854ab2" } +source = { git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c#bfb0c88062450f46341bd9a5298903fc2e952a5c" } dependencies = [ { name = "litellm", marker = "python_full_version >= '3.12'" }, { name = "matplotlib", marker = "python_full_version >= '3.12'" }, From 28b3f49aaaa69e8cf6225e9d4d35042a4890f777 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Tue, 21 Apr 2026 12:46:31 +0530 Subject: [PATCH 11/63] refactor: remove remaining redundant local imports (comprehensive sweep) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Full AST-based scan of all .py files to find every case where a module or name is imported locally inside a function body but is already available at module level. This is the second pass — the first commit handled the known cases from the lint report; this one catches everything else. Files changed (19): cli.py — 16 removals: time as _time/_t/_tmod (×10), re / re as _re (×2), os as _os, sys, partial os from combo import, from model_tools import get_tool_definitions gateway/run.py — 8 removals: MessageEvent as _ME / MessageType as _MT (×3), os as _os2, MessageEvent+MessageType (×2), Platform, BasePlatformAdapter as _BaseAdapter run_agent.py — 6 removals: get_hermes_home as _ghh, partial (contextlib, os as _os), cleanup_vm, cleanup_browser, set_interrupt as _sif (×2), partial get_toolset_for_tool hermes_cli/main.py — 4 removals: get_hermes_home, time as _time, logging as _log, shutil hermes_cli/config.py — 1 removal: get_hermes_home as _ghome hermes_cli/runtime_provider.py — 1 removal: load_config as _load_bedrock_config hermes_cli/setup.py — 2 removals: importlib.util (×2) hermes_cli/nous_subscription.py — 1 removal: from hermes_cli.config import load_config hermes_cli/tools_config.py — 1 removal: from hermes_cli.config import load_config, save_config cron/scheduler.py — 3 removals: concurrent.futures, json as _json, from hermes_cli.config import load_config batch_runner.py — 1 removal: list_distributions as get_all_dists (kept print_distribution_info, not at top level) tools/send_message_tool.py — 2 removals: import os (×2) tools/skills_tool.py — 1 removal: logging as _logging tools/browser_camofox.py — 1 removal: from hermes_cli.config import load_config tools/image_generation_tool.py — 1 removal: import fal_client environments/tool_context.py — 1 removal: concurrent.futures gateway/platforms/bluebubbles.py — 1 removal: httpx as _httpx gateway/platforms/whatsapp.py — 1 removal: import asyncio tui_gateway/server.py — 2 removals: from datetime import datetime, import time All alias references (_time, _t, _tmod, _re, _os, _os2, _json, _ghh, _ghome, _sif, _ME, _MT, _BaseAdapter, _load_bedrock_config, _httpx, _logging, _log, get_all_dists) updated to use the top-level names. --- batch_runner.py | 8 ++--- cli.py | 53 +++++++++++--------------------- cron/scheduler.py | 5 +-- environments/tool_context.py | 1 - gateway/platforms/bluebubbles.py | 2 +- gateway/platforms/whatsapp.py | 1 - gateway/run.py | 25 +++++---------- hermes_cli/config.py | 3 +- hermes_cli/main.py | 12 +------- hermes_cli/nous_subscription.py | 1 - hermes_cli/runtime_provider.py | 3 +- hermes_cli/setup.py | 2 -- hermes_cli/tools_config.py | 1 - run_agent.py | 17 ++++------ tools/browser_camofox.py | 1 - tools/image_generation_tool.py | 2 +- tools/send_message_tool.py | 2 -- tools/skills_tool.py | 3 +- tui_gateway/server.py | 2 -- 19 files changed, 43 insertions(+), 101 deletions(-) diff --git a/batch_runner.py b/batch_runner.py index c8f275a14f..7413ad59f4 100644 --- a/batch_runner.py +++ b/batch_runner.py @@ -1190,12 +1190,12 @@ def main( """ # Handle list distributions if list_distributions: - from toolset_distributions import list_distributions as get_all_dists, print_distribution_info - + from toolset_distributions import print_distribution_info + print("📊 Available Toolset Distributions") print("=" * 70) - - all_dists = get_all_dists() + + all_dists = list_distributions() for dist_name in sorted(all_dists.keys()): print_distribution_info(dist_name) diff --git a/cli.py b/cli.py index a045550dd7..b5dc6c7c19 100644 --- a/cli.py +++ b/cli.py @@ -7159,7 +7159,6 @@ class HermesCLI: # Refresh the agent's tool list so the model can call new tools if self.agent is not None: - from model_tools import get_tool_definitions self.agent.tools = get_tool_definitions( enabled_toolsets=self.agent.enabled_toolsets if hasattr(self.agent, "enabled_toolsets") else None, @@ -7242,7 +7241,6 @@ class HermesCLI: full history of tool calls (not just the current one in the spinner). """ if event_type == "tool.completed": - import time as _time self._tool_start_time = 0.0 # Print stacked scrollback line for "all" / "new" modes if function_name and self.tool_progress_mode in ("all", "new"): @@ -7271,7 +7269,6 @@ class HermesCLI: if event_type != "tool.started": return if function_name and not function_name.startswith("_"): - import time as _time from agent.display import get_tool_emoji emoji = get_tool_emoji(function_name) label = preview or function_name @@ -7280,7 +7277,7 @@ class HermesCLI: if _pl > 0 and len(label) > _pl: label = label[:_pl - 3] + "..." self._spinner_text = f"{emoji} {label}" - self._tool_start_time = _time.monotonic() + self._tool_start_time = time.monotonic() # Store args for stacked scrollback line on completion self._pending_tool_info.setdefault(function_name, []).append( function_args if function_args is not None else {} @@ -7538,7 +7535,6 @@ class HermesCLI: try: from tools.tts_tool import text_to_speech_tool from tools.voice_mode import play_audio_file - import re # Strip markdown and non-speech content for cleaner TTS tts_text = text[:4000] if len(text) > 4000 else text @@ -8374,8 +8370,7 @@ class HermesCLI: try: _dbg = _hermes_home / "interrupt_debug.log" with open(_dbg, "a") as _f: - import time as _t - _f.write(f"{_t.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, " + _f.write(f"{time.strftime('%H:%M:%S')} interrupt fired: msg={str(interrupt_msg)[:60]!r}, " f"children={len(self.agent._active_children)}, " f"parent._interrupt={self.agent._interrupt_requested}\n") for _ci, _ch in enumerate(self.agent._active_children): @@ -8451,9 +8446,8 @@ class HermesCLI: # buffer so tool/status lines render ABOVE our response box. # The flush pushes data into the renderer queue; the short # sleep lets the renderer actually paint it before we draw. - import time as _time sys.stdout.flush() - _time.sleep(0.15) + time.sleep(0.15) # Update history with full conversation self.conversation_history = result.get("messages", self.conversation_history) if result else self.conversation_history @@ -9119,8 +9113,7 @@ class HermesCLI: try: _dbg = _hermes_home / "interrupt_debug.log" with open(_dbg, "a") as _f: - import time as _t - _f.write(f"{_t.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, " + _f.write(f"{time.strftime('%H:%M:%S')} ENTER: queued interrupt msg={str(payload)[:60]!r}, " f"agent_running={self._agent_running}\n") except Exception: pass @@ -9268,8 +9261,7 @@ class HermesCLI: 2. Interrupt the running agent (first press) 3. Force exit (second press within 2s, or when idle) """ - import time as _time - now = _time.time() + now = time.time() # Cancel active voice recording. # Run cancel() in a background thread to prevent blocking the @@ -9377,12 +9369,11 @@ class HermesCLI: @kb.add('c-z') def handle_ctrl_z(event): """Handle Ctrl+Z - suspend process to background (Unix only).""" - import sys if sys.platform == 'win32': _cprint(f"\n{_DIM}Suspend (Ctrl+Z) is not supported on Windows.{_RST}") event.app.invalidate() return - import os, signal as _sig + import signal as _sig from prompt_toolkit.application import run_in_terminal from hermes_cli.skin_engine import get_active_skin agent_name = get_active_skin().get_branding("agent_name", "Hermes Agent") @@ -9696,31 +9687,29 @@ class HermesCLI: # extra instructions (sudo countdown, approval navigation, clarify). # The agent-running interrupt hint is now an inline placeholder above. def get_hint_text(): - import time as _time - if cli_ref._sudo_state: - remaining = max(0, int(cli_ref._sudo_deadline - _time.monotonic())) + remaining = max(0, int(cli_ref._sudo_deadline - time.monotonic())) return [ ('class:hint', ' password hidden · Enter to skip'), ('class:clarify-countdown', f' ({remaining}s)'), ] if cli_ref._secret_state: - remaining = max(0, int(cli_ref._secret_deadline - _time.monotonic())) + remaining = max(0, int(cli_ref._secret_deadline - time.monotonic())) return [ ('class:hint', ' secret hidden · Enter to skip'), ('class:clarify-countdown', f' ({remaining}s)'), ] if cli_ref._approval_state: - remaining = max(0, int(cli_ref._approval_deadline - _time.monotonic())) + remaining = max(0, int(cli_ref._approval_deadline - time.monotonic())) return [ ('class:hint', ' ↑/↓ to select, Enter to confirm'), ('class:clarify-countdown', f' ({remaining}s)'), ] if cli_ref._clarify_state: - remaining = max(0, int(cli_ref._clarify_deadline - _time.monotonic())) + remaining = max(0, int(cli_ref._clarify_deadline - time.monotonic())) countdown = f' ({remaining}s)' if cli_ref._clarify_deadline else '' if cli_ref._clarify_freetext: return [ @@ -10268,22 +10257,20 @@ class HermesCLI: app._on_resize = _resize_clear_ghosts def spinner_loop(): - import time as _time - last_idle_refresh = 0.0 while not self._should_exit: if not self._app: - _time.sleep(0.1) + time.sleep(0.1) continue if self._command_running: self._invalidate(min_interval=0.1) - _time.sleep(0.1) + time.sleep(0.1) else: - now = _time.monotonic() + now = time.monotonic() if now - last_idle_refresh >= 1.0: last_idle_refresh = now self._invalidate(min_interval=1.0) - _time.sleep(0.2) + time.sleep(0.2) spinner_thread = threading.Thread(target=spinner_loop, daemon=True) spinner_thread.start() @@ -10352,8 +10339,7 @@ class HermesCLI: continue # Expand paste references back to full content - import re as _re - _paste_ref_re = _re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]') + _paste_ref_re = re.compile(r'\[Pasted text #\d+: \d+ lines \u2192 (.+?)\]') paste_refs = list(_paste_ref_re.finditer(user_input)) if isinstance(user_input, str) else [] if paste_refs: user_input = self._expand_paste_references(user_input) @@ -10445,13 +10431,12 @@ class HermesCLI: try: if getattr(self, "agent", None) and getattr(self, "_agent_running", False): self.agent.interrupt(f"received signal {signum}") - import time as _t try: _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5")) except (TypeError, ValueError): _grace = 1.5 if _grace > 0: - _t.sleep(_grace) + time.sleep(_grace) except Exception: pass # never block signal handling raise KeyboardInterrupt() @@ -10484,8 +10469,7 @@ class HermesCLI: # uv-managed Python, fd 0 can be invalid or unregisterable with the # asyncio selector, causing "KeyError: '0 is not registered'" (#6393). try: - import os as _os - _os.fstat(0) + os.fstat(0) except OSError: print( "Error: stdin (fd 0) is not available.\n" @@ -10778,13 +10762,12 @@ def main( _agent = getattr(cli, "agent", None) if _agent is not None: _agent.interrupt(f"received signal {signum}") - import time as _t try: _grace = float(os.getenv("HERMES_SIGTERM_GRACE", "1.5")) except (TypeError, ValueError): _grace = 1.5 if _grace > 0: - _t.sleep(_grace) + time.sleep(_grace) except Exception: pass # never block signal handling raise KeyboardInterrupt() diff --git a/cron/scheduler.py b/cron/scheduler.py index 4b131859b2..881132006b 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -422,7 +422,6 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option # prevent "coroutine was never awaited" RuntimeWarning, then retry in a # fresh thread that has no running loop. coro.close() - import concurrent.futures with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: future = pool.submit(asyncio.run, _send_to_platform(platform, pconfig, chat_id, cleaned_delivery_content, thread_id=thread_id, media_files=media_files)) result = future.result(timeout=30) @@ -810,14 +809,13 @@ def run_job(job: dict) -> tuple[bool, str, str, Optional[str]]: prefill_messages = None prefill_file = os.getenv("HERMES_PREFILL_MESSAGES_FILE", "") or _cfg.get("prefill_messages_file", "") if prefill_file: - import json as _json pfpath = Path(prefill_file).expanduser() if not pfpath.is_absolute(): pfpath = _hermes_home / pfpath if pfpath.exists(): try: with open(pfpath, "r", encoding="utf-8") as _pf: - prefill_messages = _json.load(_pf) + prefill_messages = json.load(_pf) if not isinstance(prefill_messages, list): prefill_messages = None except Exception as e: @@ -1085,7 +1083,6 @@ def tick(verbose: bool = True, adapters=None, loop=None) -> int: logger.warning("Invalid HERMES_CRON_MAX_PARALLEL value; defaulting to unbounded") if _max_workers is None: try: - from hermes_cli.config import load_config _ucfg = load_config() or {} _cfg_par = ( _ucfg.get("cron", {}) if isinstance(_ucfg, dict) else {} diff --git a/environments/tool_context.py b/environments/tool_context.py index 10f537d724..550c5e851c 100644 --- a/environments/tool_context.py +++ b/environments/tool_context.py @@ -53,7 +53,6 @@ def _run_tool_in_thread(tool_name: str, arguments: Dict[str, Any], task_id: str) try: loop = asyncio.get_running_loop() # We're in an async context -- need to run in thread - import concurrent.futures with concurrent.futures.ThreadPoolExecutor(max_workers=1) as pool: future = pool.submit( handle_function_call, tool_name, arguments, task_id diff --git a/gateway/platforms/bluebubbles.py b/gateway/platforms/bluebubbles.py index a8a2929698..39d4e537eb 100644 --- a/gateway/platforms/bluebubbles.py +++ b/gateway/platforms/bluebubbles.py @@ -75,7 +75,7 @@ def _redact(text: str) -> str: def check_bluebubbles_requirements() -> bool: try: import aiohttp # noqa: F401 - import httpx as _httpx # noqa: F401 + import httpx # noqa: F401 except ImportError: return False return True diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index 767908023e..a82417a601 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -399,7 +399,6 @@ class WhatsAppAdapter(BasePlatformAdapter): # Check if bridge is already running and connected import aiohttp - import asyncio try: async with aiohttp.ClientSession() as session: async with session.get( diff --git a/gateway/run.py b/gateway/run.py index 647027003c..785368cffe 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3274,10 +3274,9 @@ class GatewayRunner: return "Usage: /queue " adapter = self.adapters.get(source.platform) if adapter: - from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT - queued_event = _ME( + queued_event = MessageEvent( text=queued_text, - message_type=_MT.TEXT, + message_type=MessageType.TEXT, source=event.source, message_id=event.message_id, channel_prompt=event.channel_prompt, @@ -3299,10 +3298,9 @@ class GatewayRunner: # Agent hasn't started yet — queue as turn-boundary fallback. adapter = self.adapters.get(source.platform) if adapter: - from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT - queued_event = _ME( + queued_event = MessageEvent( text=steer_text, - message_type=_MT.TEXT, + message_type=MessageType.TEXT, source=event.source, message_id=event.message_id, channel_prompt=event.channel_prompt, @@ -3322,10 +3320,9 @@ class GatewayRunner: # Running agent is missing or lacks steer() — fall back to queue. adapter = self.adapters.get(source.platform) if adapter: - from gateway.platforms.base import MessageEvent as _ME, MessageType as _MT - queued_event = _ME( + queued_event = MessageEvent( text=steer_text, - message_type=_MT.TEXT, + message_type=MessageType.TEXT, source=event.source, message_id=event.message_id, channel_prompt=event.channel_prompt, @@ -3857,9 +3854,7 @@ class GatewayRunner: for i, path in enumerate(event.media_urls): mtype = event.media_types[i] if i < len(event.media_types) else "" if mtype in ("", "application/octet-stream"): - import os as _os2 - - _ext = _os2.path.splitext(path)[1].lower() + _ext = os.path.splitext(path)[1].lower() if _ext in _TEXT_EXTENSIONS: mtype = "text/plain" else: @@ -8302,7 +8297,6 @@ class GatewayRunner: if not adapter: return try: - from gateway.platforms.base import MessageEvent, MessageType synth_event = MessageEvent( text=synth_text, message_type=MessageType.TEXT, @@ -8407,7 +8401,6 @@ class GatewayRunner: break if adapter and source.chat_id: try: - from gateway.platforms.base import MessageEvent, MessageType synth_event = MessageEvent( text=synth_text, message_type=MessageType.TEXT, @@ -8929,7 +8922,6 @@ class GatewayRunner: if _streaming_enabled: try: from gateway.stream_consumer import GatewayStreamConsumer, StreamConsumerConfig - from gateway.config import Platform _adapter = self.adapters.get(source.platform) if _adapter: _adapter_supports_edit = getattr(_adapter, "SUPPORTS_MESSAGE_EDITING", True) @@ -9279,8 +9271,7 @@ class GatewayRunner: # Skip tool progress for platforms that don't support message # editing (e.g. iMessage/BlueBubbles) — each progress update # would become a separate message bubble, which is noisy. - from gateway.platforms.base import BasePlatformAdapter as _BaseAdapter - if type(adapter).edit_message is _BaseAdapter.edit_message: + if type(adapter).edit_message is BasePlatformAdapter.edit_message: while not progress_queue.empty(): try: progress_queue.get_nowait() diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 255721482a..a87d1d23c9 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -2605,8 +2605,7 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A # Scan ``$HERMES_HOME/plugins/`` for currently installed user plugins. grandfathered: List[str] = [] try: - from hermes_constants import get_hermes_home as _ghome - user_plugins_dir = _ghome() / "plugins" + user_plugins_dir = get_hermes_home() / "plugins" if user_plugins_dir.is_dir(): for child in sorted(user_plugins_dir.iterdir()): if not child.is_dir(): diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 9fa8f0e52b..e9c41f7178 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -4319,8 +4319,6 @@ def _gateway_prompt(prompt_text: str, default: str = "", timeout: float = 300.0) tmp.replace(prompt_path) # Poll for response - import time as _time - deadline = _time.monotonic() + timeout while _time.monotonic() < deadline: if response_path.exists(): @@ -5214,8 +5212,6 @@ def _install_hangup_protection(gateway_mode: bool = False): # (2) Mirror output to update.log and wrap stdio for broken-pipe # tolerance. Any failure here is non-fatal; we just skip the wrap. try: - from hermes_cli.config import get_hermes_home - logs_dir = get_hermes_home() / "logs" logs_dir.mkdir(parents=True, exist_ok=True) log_path = logs_dir / "update.log" @@ -5791,8 +5787,6 @@ def _cmd_update_impl(args, gateway_mode: bool): # Verify the service actually survived the # restart. systemctl restart returns 0 even # if the new process crashes immediately. - import time as _time - _time.sleep(3) verify = subprocess.run( scope_cmd + ["is-active", svc_name], @@ -7679,9 +7673,7 @@ Examples: ) cmd_info["setup_fn"](plugin_parser) except Exception as _exc: - import logging as _log - - _log.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc) + logging.getLogger(__name__).debug("Plugin CLI discovery failed: %s", _exc) # ========================================================================= # memory command @@ -8145,8 +8137,6 @@ Examples: # Launch hermes --resume by replacing the current process print(f"Resuming session: {selected_id}") - import shutil - hermes_bin = shutil.which("hermes") if hermes_bin: os.execvp(hermes_bin, ["hermes", "--resume", selected_id]) diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py index 691126a4c6..a4883b056b 100644 --- a/hermes_cli/nous_subscription.py +++ b/hermes_cli/nous_subscription.py @@ -586,7 +586,6 @@ def get_gateway_eligible_tools( return [], [], [] if config is None: - from hermes_cli.config import load_config config = load_config() or {} # Quick provider check without the heavy get_nous_subscription_features call diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 3b2b4cab3c..fd28f51368 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -906,8 +906,7 @@ def resolve_runtime_provider( code="no_aws_credentials", ) # Read bedrock-specific config from config.yaml - from hermes_cli.config import load_config as _load_bedrock_config - _bedrock_cfg = _load_bedrock_config().get("bedrock", {}) + _bedrock_cfg = load_config().get("bedrock", {}) # Region priority: config.yaml bedrock.region → env var → us-east-1 region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region() auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain" diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 9fcc538c75..3c00fa4f0f 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -434,7 +434,6 @@ def _print_setup_summary(config: dict, hermes_home): tool_status.append(("Text-to-Speech (Google Gemini)", True, None)) elif tts_provider == "neutts": try: - import importlib.util neutts_ok = importlib.util.find_spec("neutts") is not None except Exception: neutts_ok = False @@ -963,7 +962,6 @@ def _setup_tts_provider(config: dict): if selected == "neutts": # Check if already installed try: - import importlib.util already_installed = importlib.util.find_spec("neutts") is not None except Exception: already_installed = False diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 23a03b3bd2..cb1f393713 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -1186,7 +1186,6 @@ def _configure_simple_requirements(ts_key: str): if api_key and api_key.strip(): save_env_value("OPENAI_API_KEY", api_key.strip()) # Save vision base URL to config (not .env — only secrets go there) - from hermes_cli.config import load_config, save_config _cfg = load_config() _aux = _cfg.setdefault("auxiliary", {}).setdefault("vision", {}) _aux["base_url"] = base_url diff --git a/run_agent.py b/run_agent.py index e03e285c70..9c6e9d7b92 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1453,11 +1453,10 @@ class AIAgent: if _mp and _mp.is_available(): self._memory_manager.add_provider(_mp) if self._memory_manager.providers: - from hermes_constants import get_hermes_home as _ghh _init_kwargs = { "session_id": self.session_id, "platform": platform or "cli", - "hermes_home": str(_ghh()), + "hermes_home": str(get_hermes_home()), "agent_context": "primary", } # Thread session title for memory provider scoping @@ -2777,10 +2776,10 @@ class AIAgent: prompt = self._SKILL_REVIEW_PROMPT def _run_review(): - import contextlib, os as _os + import contextlib review_agent = None try: - with open(_os.devnull, "w") as _devnull, \ + with open(os.devnull, "w") as _devnull, \ contextlib.redirect_stdout(_devnull), \ contextlib.redirect_stderr(_devnull): review_agent = AIAgent( @@ -3852,14 +3851,12 @@ class AIAgent: # 2. Clean terminal sandbox environments try: - from tools.terminal_tool import cleanup_vm cleanup_vm(task_id) except Exception: pass # 3. Clean browser daemon sessions try: - from tools.browser_tool import cleanup_browser cleanup_browser(task_id) except Exception: pass @@ -7777,8 +7774,7 @@ class AIAgent: # the tool returns True on the next poll. if self._interrupt_requested: try: - from tools.interrupt import set_interrupt as _sif - _sif(True, _worker_tid) + _set_interrupt(True, _worker_tid) except Exception: pass # Set the activity callback on THIS worker thread so @@ -7809,8 +7805,7 @@ class AIAgent: with self._tool_worker_threads_lock: self._tool_worker_threads.discard(_worker_tid) try: - from tools.interrupt import set_interrupt as _sif - _sif(False, _worker_tid) + _set_interrupt(False, _worker_tid) except Exception: pass @@ -11864,7 +11859,7 @@ def main( # Handle tool listing if list_tools: - from model_tools import get_all_tool_names, get_toolset_for_tool, get_available_toolsets + from model_tools import get_all_tool_names, get_available_toolsets from toolsets import get_all_toolsets, get_toolset_info print("📋 Available Tools & Toolsets:") diff --git a/tools/browser_camofox.py b/tools/browser_camofox.py index 0efeb16e9b..e1233859ae 100644 --- a/tools/browser_camofox.py +++ b/tools/browser_camofox.py @@ -543,7 +543,6 @@ def camofox_vision(question: str, annotate: bool = False, ) try: - from hermes_cli.config import load_config _cfg = load_config() _vision_cfg = _cfg.get("auxiliary", {}).get("vision", {}) _vision_timeout = float(_vision_cfg.get("timeout", 120)) diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py index cf1003d12b..13f17abe30 100644 --- a/tools/image_generation_tool.py +++ b/tools/image_generation_tool.py @@ -742,7 +742,7 @@ def check_image_generation_requirements() -> bool: try: if not check_fal_api_key(): return False - import fal_client # noqa: F401 — SDK presence check + fal_client # noqa: F401 — SDK presence check return True except ImportError: return False diff --git a/tools/send_message_tool.py b/tools/send_message_tool.py index dacc7e17ab..19da4f55af 100644 --- a/tools/send_message_tool.py +++ b/tools/send_message_tool.py @@ -226,7 +226,6 @@ def _handle_send(args): # Weixin can be configured purely via .env; synthesize a pconfig so # send_message and cron delivery work without a gateway.yaml entry. if platform_name == "weixin": - import os wx_token = os.getenv("WEIXIN_TOKEN", "").strip() wx_account = os.getenv("WEIXIN_ACCOUNT_ID", "").strip() if wx_token and wx_account: @@ -254,7 +253,6 @@ def _handle_send(args): if not chat_id: home = config.get_home_channel(platform) if not home and platform_name == "weixin": - import os wx_home = os.getenv("WEIXIN_HOME_CHANNEL", "").strip() if wx_home: from gateway.config import HomeChannel diff --git a/tools/skills_tool.py b/tools/skills_tool.py index f5ab9eeacd..dcd1f8c5d1 100644 --- a/tools/skills_tool.py +++ b/tools/skills_tool.py @@ -975,8 +975,7 @@ def skill_view(name: str, file_path: str = None, task_id: str = None) -> str: _warnings.append(f"skill file is outside the trusted skills directory (~/.hermes/skills/): {skill_md}") if _injection_detected: _warnings.append("skill content contains patterns that may indicate prompt injection") - import logging as _logging - _logging.getLogger(__name__).warning("Skill security warning for '%s': %s", name, "; ".join(_warnings)) + logging.getLogger(__name__).warning("Skill security warning for '%s': %s", name, "; ".join(_warnings)) parsed_frontmatter: Dict[str, Any] = {} try: diff --git a/tui_gateway/server.py b/tui_gateway/server.py index 6a20b612a3..7798817803 100644 --- a/tui_gateway/server.py +++ b/tui_gateway/server.py @@ -1608,7 +1608,6 @@ def _(rid, params: dict) -> dict: if err: return err try: - from datetime import datetime from hermes_cli.clipboard import has_clipboard_image, save_clipboard_image except Exception as e: return _err(rid, 5027, f"clipboard unavailable: {e}") @@ -2687,7 +2686,6 @@ def _(rid, params: dict) -> dict: def _(rid, params: dict) -> dict: days = params.get("days", 30) try: - import time cutoff = time.time() - days * 86400 rows = [s for s in _get_db().list_sessions_rich(limit=500) if (s.get("started_at") or 0) >= cutoff] return _ok(rid, {"days": days, "sessions": len(rows), "messages": sum(s.get("message_count", 0) for s in rows)}) From c312e8ecf537778ab5796a9fe1869b668efefc69 Mon Sep 17 00:00:00 2001 From: alt-glitch Date: Tue, 21 Apr 2026 00:34:15 -0700 Subject: [PATCH 12/63] fix(update): keep get_hermes_home late-bound in _install_hangup_protection Follow-up to the redundant-imports sweep. _install_hangup_protection used to import get_hermes_home locally; the sweep hoisted it to the module-level binding already present at line 164. test_non_fatal_if_log_setup_fails monkeypatches hermes_cli.config.get_hermes_home to raise, which only works when the function late-binds its lookup. The hoisted version captures the reference at import time and bypasses the monkeypatch. Restore the local import (with a distinct local alias) so the test seam works and the stdio-untouched-on-setup-failure invariant is actually exercised. --- hermes_cli/main.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/hermes_cli/main.py b/hermes_cli/main.py index e9c41f7178..1da3fcbbe8 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -5212,7 +5212,11 @@ def _install_hangup_protection(gateway_mode: bool = False): # (2) Mirror output to update.log and wrap stdio for broken-pipe # tolerance. Any failure here is non-fatal; we just skip the wrap. try: - logs_dir = get_hermes_home() / "logs" + # Late-bound import so tests can monkeypatch + # hermes_cli.config.get_hermes_home to simulate setup failure. + from hermes_cli.config import get_hermes_home as _get_hermes_home + + logs_dir = _get_hermes_home() / "logs" logs_dir.mkdir(parents=True, exist_ok=True) log_path = logs_dir / "update.log" log_file = open(log_path, "a", buffering=1, encoding="utf-8") From c5a814b23337319a0e77c69b94b3f4d784fb3cc2 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 00:52:25 -0700 Subject: [PATCH 13/63] feat(maps): add guest_house, camp_site, and dual-key bakery lookup (#13398) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Small follow-up inspired by stale PR #2421 (@poojandpatel). - bakery now searches both shop=bakery AND amenity=bakery in one Overpass query so indie bakeries tagged either way are returned. Reproduces #2421's Lawrenceville, NJ test case (The Gingered Peach, WildFlour Bakery). - Adds tourism=guest_house and tourism=camp_site as first-class categories. - CATEGORY_TAGS entries can now be a list of (key, value) tuples; new _tags_for() normaliser + tag_pairs= kwarg on build_overpass_nearby/bbox union the results in one query. Old single-tuple call sites unchanged (back-compat preserved). - SKILL.md: 44 → 46 categories, list updated. --- skills/productivity/maps/SKILL.md | 15 ++-- .../productivity/maps/scripts/maps_client.py | 82 ++++++++++++++----- 2 files changed, 71 insertions(+), 26 deletions(-) diff --git a/skills/productivity/maps/SKILL.md b/skills/productivity/maps/SKILL.md index 9eded20866..d93692a4a6 100644 --- a/skills/productivity/maps/SKILL.md +++ b/skills/productivity/maps/SKILL.md @@ -2,7 +2,7 @@ name: maps description: > Location intelligence — geocode a place, reverse-geocode coordinates, - find nearby places (44 POI categories), driving/walking/cycling + find nearby places (46 POI categories), driving/walking/cycling distance + time, turn-by-turn directions, timezone lookup, bounding box + area for a named place, and POI search within a rectangle. Uses OpenStreetMap + Overpass + OSRM. Free, no API key. @@ -83,12 +83,13 @@ python3 $MAPS nearby --near "90210" --category pharmacy python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10 ``` -44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket, -atm, gas_station, parking, museum, park, school, university, bank, police, -fire_station, library, airport, train_station, bus_stop, church, mosque, -synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office, -convenience_store, bakery, bookshop, laundry, car_wash, car_rental, -bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub. +46 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, guest_house, +camp_site, supermarket, atm, gas_station, parking, museum, park, school, +university, bank, police, fire_station, library, airport, train_station, +bus_stop, church, mosque, synagogue, dentist, doctor, cinema, theatre, gym, +swimming_pool, post_office, convenience_store, bakery, bookshop, laundry, +car_wash, car_rental, bicycle_rental, taxi, veterinary, zoo, playground, +stadium, nightclub. Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`, `maps_url` (clickable Google Maps link), `directions_url` (Google Maps diff --git a/skills/productivity/maps/scripts/maps_client.py b/skills/productivity/maps/scripts/maps_client.py index db0de82d6d..06d775e824 100644 --- a/skills/productivity/maps/scripts/maps_client.py +++ b/skills/productivity/maps/scripts/maps_client.py @@ -58,7 +58,9 @@ CATEGORY_TAGS = { "restaurant": ("amenity", "restaurant"), "cafe": ("amenity", "cafe"), "bar": ("amenity", "bar"), - "bakery": ("shop", "bakery"), + # bakery is tagged as shop=bakery in the OSM wiki, but some mappers use + # amenity=bakery. Search both so small indie bakeries aren't missed. + "bakery": [("shop", "bakery"), ("amenity", "bakery")], "convenience_store": ("shop", "convenience"), # Health "hospital": ("amenity", "hospital"), @@ -68,6 +70,8 @@ CATEGORY_TAGS = { "veterinary": ("amenity", "veterinary"), # Accommodation "hotel": ("tourism", "hotel"), + "guest_house": ("tourism", "guest_house"), + "camp_site": ("tourism", "camp_site"), # Shopping & Services "supermarket": ("shop", "supermarket"), "bookshop": ("shop", "books"), @@ -120,6 +124,19 @@ RELIGION_FILTER = { VALID_CATEGORIES = sorted(CATEGORY_TAGS.keys()) + +def _tags_for(category): + """Return the CATEGORY_TAGS entry as a list of (key, value) pairs. + + Most categories map to a single (tag_key, tag_val) tuple, but some + (e.g. ``bakery``) are tagged under more than one OSM key and are + represented as a list of tuples. Normalise both forms to a list. + """ + entry = CATEGORY_TAGS[category] + if isinstance(entry, list): + return list(entry) + return [entry] + OSRM_PROFILES = { "driving": "driving", "walking": "foot", @@ -338,36 +355,63 @@ def geocode_single(query): # --------------------------------------------------------------------------- def build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit, - religion=None): - """Build an Overpass QL query for nearby POIs around a point.""" + religion=None, tag_pairs=None): + """Build an Overpass QL query for nearby POIs around a point. + + If ``tag_pairs`` is provided, the query unions across every + ``(key, value)`` pair (used for categories like ``bakery`` that are + tagged under more than one OSM key). Otherwise falls back to the + single ``tag_key``/``tag_val`` pair for back-compat. + """ + pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)] religion_filter = "" if religion: religion_filter = f'["religion"="{religion}"]' + body_lines = [] + for k, v in pairs: + body_lines.append( + f' node["{k}"="{v}"]{religion_filter}' + f'(around:{radius},{lat},{lon});' + ) + body_lines.append( + f' way["{k}"="{v}"]{religion_filter}' + f'(around:{radius},{lat},{lon});' + ) + body = "\n".join(body_lines) return ( f'[out:json][timeout:25];\n' f'(\n' - f' node["{tag_key}"="{tag_val}"]{religion_filter}' - f'(around:{radius},{lat},{lon});\n' - f' way["{tag_key}"="{tag_val}"]{religion_filter}' - f'(around:{radius},{lat},{lon});\n' + f'{body}\n' f');\n' f'out center {limit};\n' ) def build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit, - religion=None): - """Build an Overpass QL query for POIs within a bounding box.""" + religion=None, tag_pairs=None): + """Build an Overpass QL query for POIs within a bounding box. + + See ``build_overpass_nearby`` for ``tag_pairs`` semantics. + """ + pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)] religion_filter = "" if religion: religion_filter = f'["religion"="{religion}"]' + body_lines = [] + for k, v in pairs: + body_lines.append( + f' node["{k}"="{v}"]{religion_filter}' + f'({south},{west},{north},{east});' + ) + body_lines.append( + f' way["{k}"="{v}"]{religion_filter}' + f'({south},{west},{north},{east});' + ) + body = "\n".join(body_lines) return ( f'[out:json][timeout:25];\n' f'(\n' - f' node["{tag_key}"="{tag_val}"]{religion_filter}' - f'({south},{west},{north},{east});\n' - f' way["{tag_key}"="{tag_val}"]{religion_filter}' - f'({south},{west},{north},{east});\n' + f'{body}\n' f');\n' f'out center {limit};\n' ) @@ -605,10 +649,10 @@ def cmd_nearby(args): # appear twice. merged = {} for category in categories: - tag_key, tag_val = CATEGORY_TAGS[category] + tag_pairs = _tags_for(category) religion = RELIGION_FILTER.get(category) - query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit, - religion=religion) + query = build_overpass_nearby(None, None, lat, lon, radius, limit, + religion=religion, tag_pairs=tag_pairs) raw = overpass_query(query) elements = raw.get("elements", []) for place in parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon): @@ -945,10 +989,10 @@ def cmd_bbox(args): if limit <= 0: error_exit("Limit must be a positive integer.") - tag_key, tag_val = CATEGORY_TAGS[category] + tag_pairs = _tags_for(category) religion = RELIGION_FILTER.get(category) - query = build_overpass_bbox(tag_key, tag_val, south, west, north, east, - limit, religion=religion) + query = build_overpass_bbox(None, None, south, west, north, east, + limit, religion=religion, tag_pairs=tag_pairs) raw = overpass_query(query) From 04f9ffb792da7e2234727b69e6171a1e90da47bc Mon Sep 17 00:00:00 2001 From: Junass1 Date: Tue, 21 Apr 2026 00:37:26 -0700 Subject: [PATCH 14/63] fix(gateway): preserve sender attribution in shared group sessions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Generalize shared multi-user session handling so non-thread group sessions (group_sessions_per_user=False) get the same treatment as shared threads: inbound messages are prefixed with [sender name], and the session prompt shows a multi-user note instead of pinning a single **User:** line into the cached system prompt. Before: build_session_key already treated these as shared sessions, but _prepare_inbound_message_text and build_session_context_prompt only recognized shared threads — creating cross-user attribution drift and prompt-cache contamination in shared groups. - Add is_shared_multi_user_session() helper alongside build_session_key() so both the session key and the multi-user branches are driven by the same rules (DMs never shared, threads shared unless thread_sessions_per_user, groups shared unless group_sessions_per_user). - Add shared_multi_user_session field to SessionContext, populated by build_session_context() from config. - Use context.shared_multi_user_session in the prompt builder (label is 'Multi-user thread' when a thread is present, 'Multi-user session' otherwise). - Use the helper in _prepare_inbound_message_text so non-thread shared groups also get [sender] prefixes. Default behavior unchanged: DMs stay single-user, groups with group_sessions_per_user=True still show the user normally, shared threads keep their existing multi-user behavior. Tests (65 passed): - tests/gateway/test_session.py: new shared non-thread group prompt case. - tests/gateway/test_shared_group_sender_prefix.py: inbound preprocessing for shared non-thread groups and default groups. --- gateway/run.py | 11 +-- gateway/session.py | 48 ++++++++++--- tests/gateway/test_session.py | 22 ++++++ .../test_shared_group_sender_prefix.py | 70 +++++++++++++++++++ 4 files changed, 135 insertions(+), 16 deletions(-) create mode 100644 tests/gateway/test_shared_group_sender_prefix.py diff --git a/gateway/run.py b/gateway/run.py index 785368cffe..0343790b04 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -279,6 +279,7 @@ from gateway.session import ( build_session_context, build_session_context_prompt, build_session_key, + is_shared_multi_user_session, ) from gateway.delivery import DeliveryRouter from gateway.platforms.base import ( @@ -3789,12 +3790,12 @@ class GatewayRunner: history = history or [] message_text = event.text or "" - _is_shared_thread = ( - source.chat_type != "dm" - and source.thread_id - and not getattr(self.config, "thread_sessions_per_user", False) + _is_shared_multi_user = is_shared_multi_user_session( + source, + group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True), + thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False), ) - if _is_shared_thread and source.user_name: + if _is_shared_multi_user and source.user_name: message_text = f"[{source.user_name}] {message_text}" if event.media_urls: diff --git a/gateway/session.py b/gateway/session.py index 81278e8521..7fc83b0811 100644 --- a/gateway/session.py +++ b/gateway/session.py @@ -152,6 +152,7 @@ class SessionContext: source: SessionSource connected_platforms: List[Platform] home_channels: Dict[Platform, HomeChannel] + shared_multi_user_session: bool = False # Session metadata session_key: str = "" @@ -166,6 +167,7 @@ class SessionContext: "home_channels": { p.value: hc.to_dict() for p, hc in self.home_channels.items() }, + "shared_multi_user_session": self.shared_multi_user_session, "session_key": self.session_key, "session_id": self.session_id, "created_at": self.created_at.isoformat() if self.created_at else None, @@ -240,18 +242,16 @@ def build_session_context_prompt( lines.append(f"**Channel Topic:** {context.source.chat_topic}") # User identity. - # In shared thread sessions (non-DM with thread_id), multiple users - # contribute to the same conversation. Don't pin a single user name - # in the system prompt — it changes per-turn and would bust the prompt - # cache. Instead, note that this is a multi-user thread; individual - # sender names are prefixed on each user message by the gateway. - _is_shared_thread = ( - context.source.chat_type != "dm" - and context.source.thread_id - ) - if _is_shared_thread: + # In shared multi-user sessions (shared threads OR shared non-thread groups + # when group_sessions_per_user=False), multiple users contribute to the same + # conversation. Don't pin a single user name in the system prompt — it + # changes per-turn and would bust the prompt cache. Instead, note that + # this is a multi-user session; individual sender names are prefixed on + # each user message by the gateway. + if context.shared_multi_user_session: + session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session" lines.append( - "**Session type:** Multi-user thread — messages are prefixed " + f"**Session type:** {session_label} — messages are prefixed " "with [sender name]. Multiple users may participate." ) elif context.source.user_name: @@ -467,6 +467,27 @@ class SessionEntry: ) +def is_shared_multi_user_session( + source: SessionSource, + *, + group_sessions_per_user: bool = True, + thread_sessions_per_user: bool = False, +) -> bool: + """Return True when a non-DM session is shared across participants. + + Mirrors the isolation rules in :func:`build_session_key`: + - DMs are never shared. + - Threads are shared unless ``thread_sessions_per_user`` is True. + - Non-thread group/channel sessions are shared unless + ``group_sessions_per_user`` is True (default: True = isolated). + """ + if source.chat_type == "dm": + return False + if source.thread_id: + return not thread_sessions_per_user + return not group_sessions_per_user + + def build_session_key( source: SessionSource, group_sessions_per_user: bool = True, @@ -1238,6 +1259,11 @@ def build_session_context( source=source, connected_platforms=connected, home_channels=home_channels, + shared_multi_user_session=is_shared_multi_user_session( + source, + group_sessions_per_user=getattr(config, "group_sessions_per_user", True), + thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False), + ), ) if session_entry: diff --git a/tests/gateway/test_session.py b/tests/gateway/test_session.py index 39e4aad3d6..bf1eba51df 100644 --- a/tests/gateway/test_session.py +++ b/tests/gateway/test_session.py @@ -356,6 +356,28 @@ class TestBuildSessionContextPrompt: assert "**User:** Alice" in prompt assert "Multi-user thread" not in prompt + def test_shared_non_thread_group_prompt_hides_single_user(self): + """Shared non-thread group sessions should avoid pinning one user.""" + config = GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"), + }, + group_sessions_per_user=False, + ) + source = SessionSource( + platform=Platform.TELEGRAM, + chat_id="-1002285219667", + chat_name="Test Group", + chat_type="group", + user_name="Alice", + ) + ctx = build_session_context(source, config) + prompt = build_session_context_prompt(ctx) + + assert "Multi-user session" in prompt + assert "[sender name]" in prompt + assert "**User:** Alice" not in prompt + def test_dm_thread_shows_user_not_multi(self): """DM threads are single-user and should show User, not multi-user note.""" config = GatewayConfig( diff --git a/tests/gateway/test_shared_group_sender_prefix.py b/tests/gateway/test_shared_group_sender_prefix.py new file mode 100644 index 0000000000..9f0e525f64 --- /dev/null +++ b/tests/gateway/test_shared_group_sender_prefix.py @@ -0,0 +1,70 @@ +import pytest + +from gateway.config import GatewayConfig, Platform, PlatformConfig +from gateway.platforms.base import MessageEvent +from gateway.run import GatewayRunner +from gateway.session import SessionSource + + +def _make_runner(config: GatewayConfig) -> GatewayRunner: + runner = object.__new__(GatewayRunner) + runner.config = config + runner.adapters = {} + runner._model = "openai/gpt-4.1-mini" + runner._base_url = None + return runner + + +@pytest.mark.asyncio +async def test_preprocess_prefixes_sender_for_shared_non_thread_group_session(): + runner = _make_runner( + GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"), + }, + group_sessions_per_user=False, + ) + ) + source = SessionSource( + platform=Platform.TELEGRAM, + chat_id="-1002285219667", + chat_name="Test Group", + chat_type="group", + user_name="Alice", + ) + event = MessageEvent(text="hello", source=source) + + result = await runner._prepare_inbound_message_text( + event=event, + source=source, + history=[], + ) + + assert result == "[Alice] hello" + + +@pytest.mark.asyncio +async def test_preprocess_keeps_plain_text_for_default_group_sessions(): + runner = _make_runner( + GatewayConfig( + platforms={ + Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"), + }, + ) + ) + source = SessionSource( + platform=Platform.TELEGRAM, + chat_id="-1002285219667", + chat_name="Test Group", + chat_type="group", + user_name="Alice", + ) + event = MessageEvent(text="hello", source=source) + + result = await runner._prepare_inbound_message_text( + event=event, + source=source, + history=[], + ) + + assert result == "hello" From 731f4fbae677dfd1e6bce49c53c456ae38f709b5 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Tue, 21 Apr 2026 12:05:18 +0530 Subject: [PATCH 15/63] feat: add transport ABC + AnthropicTransport wired to all paths MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ProviderTransport ABC (4 abstract methods: convert_messages, convert_tools, build_kwargs, normalize_response) plus optional hooks (validate_response, extract_cache_stats, map_finish_reason). Add transport registry with lazy discovery — get_transport() auto-imports transport modules on first call. Add AnthropicTransport — delegates to existing anthropic_adapter.py functions, wired to ALL Anthropic code paths in run_agent.py: - Main normalize loop (L10775) - Main build_kwargs (L6673) - Response validation (L9366) - Finish reason mapping (L9534) - Cache stats extraction (L9827) - Truncation normalize (L9565) - Memory flush build_kwargs + normalize (L7363, L7395) - Iteration-limit summary + retry (L8465, L8498) Zero direct adapter imports remain for transport methods. Client lifecycle, streaming, auth, and credential management stay on AIAgent. 20 new tests (ABC contract, registry, AnthropicTransport methods). 359 anthropic-related tests pass (0 failures). PR 3 of the provider transport refactor. --- agent/transports/__init__.py | 40 ++++- agent/transports/anthropic.py | 129 +++++++++++++ agent/transports/base.py | 89 +++++++++ run_agent.py | 106 ++++++----- tests/agent/transports/test_transport.py | 220 +++++++++++++++++++++++ 5 files changed, 539 insertions(+), 45 deletions(-) create mode 100644 agent/transports/anthropic.py create mode 100644 agent/transports/base.py create mode 100644 tests/agent/transports/test_transport.py diff --git a/agent/transports/__init__.py b/agent/transports/__init__.py index 6ee1c51174..6cd3a277a1 100644 --- a/agent/transports/__init__.py +++ b/agent/transports/__init__.py @@ -1 +1,39 @@ -"""Transport layer types for provider response normalization.""" +"""Transport layer types and registry for provider response normalization. + +Usage: + from agent.transports import get_transport + transport = get_transport("anthropic_messages") + result = transport.normalize_response(raw_response) +""" + +from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401 + +_REGISTRY: dict = {} + + +def register_transport(api_mode: str, transport_cls: type) -> None: + """Register a transport class for an api_mode string.""" + _REGISTRY[api_mode] = transport_cls + + +def get_transport(api_mode: str): + """Get a transport instance for the given api_mode. + + Returns None if no transport is registered for this api_mode. + This allows gradual migration — call sites can check for None + and fall back to the legacy code path. + """ + if not _REGISTRY: + _discover_transports() + cls = _REGISTRY.get(api_mode) + if cls is None: + return None + return cls() + + +def _discover_transports() -> None: + """Import all transport modules to trigger auto-registration.""" + try: + import agent.transports.anthropic # noqa: F401 + except ImportError: + pass diff --git a/agent/transports/anthropic.py b/agent/transports/anthropic.py new file mode 100644 index 0000000000..7ffa71a6f9 --- /dev/null +++ b/agent/transports/anthropic.py @@ -0,0 +1,129 @@ +"""Anthropic Messages API transport. + +Delegates to the existing adapter functions in agent/anthropic_adapter.py. +This transport owns format conversion and normalization — NOT client lifecycle. +""" + +from typing import Any, Dict, List, Optional + +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse + + +class AnthropicTransport(ProviderTransport): + """Transport for api_mode='anthropic_messages'. + + Wraps the existing functions in anthropic_adapter.py behind the + ProviderTransport ABC. Each method delegates — no logic is duplicated. + """ + + @property + def api_mode(self) -> str: + return "anthropic_messages" + + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: + """Convert OpenAI messages to Anthropic (system, messages) tuple. + + kwargs: + base_url: Optional[str] — affects thinking signature handling. + """ + from agent.anthropic_adapter import convert_messages_to_anthropic + + base_url = kwargs.get("base_url") + return convert_messages_to_anthropic(messages, base_url=base_url) + + def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: + """Convert OpenAI tool schemas to Anthropic input_schema format.""" + from agent.anthropic_adapter import convert_tools_to_anthropic + + return convert_tools_to_anthropic(tools) + + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build Anthropic messages.create() kwargs. + + Calls convert_messages and convert_tools internally. + + params (all optional): + max_tokens: int + reasoning_config: dict | None + tool_choice: str | None + is_oauth: bool + preserve_dots: bool + context_length: int | None + base_url: str | None + fast_mode: bool + """ + from agent.anthropic_adapter import build_anthropic_kwargs + + return build_anthropic_kwargs( + model=model, + messages=messages, + tools=tools, + max_tokens=params.get("max_tokens", 16384), + reasoning_config=params.get("reasoning_config"), + tool_choice=params.get("tool_choice"), + is_oauth=params.get("is_oauth", False), + preserve_dots=params.get("preserve_dots", False), + context_length=params.get("context_length"), + base_url=params.get("base_url"), + fast_mode=params.get("fast_mode", False), + ) + + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize Anthropic response to NormalizedResponse. + + kwargs: + strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names. + """ + from agent.anthropic_adapter import normalize_anthropic_response_v2 + + strip_tool_prefix = kwargs.get("strip_tool_prefix", False) + return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix) + + def validate_response(self, response: Any) -> bool: + """Check Anthropic response structure is valid.""" + if response is None: + return False + content_blocks = getattr(response, "content", None) + if not isinstance(content_blocks, list): + return False + if not content_blocks: + return False + return True + + def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]: + """Extract Anthropic cache_read and cache_creation token counts.""" + usage = getattr(response, "usage", None) + if usage is None: + return None + cached = getattr(usage, "cache_read_input_tokens", 0) or 0 + written = getattr(usage, "cache_creation_input_tokens", 0) or 0 + if cached or written: + return {"cached_tokens": cached, "creation_tokens": written} + return None + + # Promote the adapter's canonical mapping to module level so it's shared + _STOP_REASON_MAP = { + "end_turn": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "stop_sequence": "stop", + "refusal": "content_filter", + "model_context_window_exceeded": "length", + } + + def map_finish_reason(self, raw_reason: str) -> str: + """Map Anthropic stop_reason to OpenAI finish_reason.""" + return self._STOP_REASON_MAP.get(raw_reason, "stop") + + +# Auto-register on import +from agent.transports import register_transport # noqa: E402 + +register_transport("anthropic_messages", AnthropicTransport) diff --git a/agent/transports/base.py b/agent/transports/base.py new file mode 100644 index 0000000000..b516967b6a --- /dev/null +++ b/agent/transports/base.py @@ -0,0 +1,89 @@ +"""Abstract base for provider transports. + +A transport owns the data path for one api_mode: + convert_messages → convert_tools → build_kwargs → normalize_response + +It does NOT own: client construction, streaming, credential refresh, +prompt caching, interrupt handling, or retry logic. Those stay on AIAgent. +""" + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional + +from agent.transports.types import NormalizedResponse + + +class ProviderTransport(ABC): + """Base class for provider-specific format conversion and normalization.""" + + @property + @abstractmethod + def api_mode(self) -> str: + """The api_mode string this transport handles (e.g. 'anthropic_messages').""" + ... + + @abstractmethod + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: + """Convert OpenAI-format messages to provider-native format. + + Returns provider-specific structure (e.g. (system, messages) for Anthropic, + or the messages list unchanged for chat_completions). + """ + ... + + @abstractmethod + def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: + """Convert OpenAI-format tool definitions to provider-native format. + + Returns provider-specific tool list (e.g. Anthropic input_schema format). + """ + ... + + @abstractmethod + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build the complete API call kwargs dict. + + This is the primary entry point — it typically calls convert_messages() + and convert_tools() internally, then adds model-specific config. + + Returns a dict ready to be passed to the provider's SDK client. + """ + ... + + @abstractmethod + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize a raw provider response to the shared NormalizedResponse type. + + This is the only method that returns a transport-layer type. + """ + ... + + def validate_response(self, response: Any) -> bool: + """Optional: check if the raw response is structurally valid. + + Returns True if valid, False if the response should be treated as invalid. + Default implementation always returns True. + """ + return True + + def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]: + """Optional: extract provider-specific cache hit/creation stats. + + Returns dict with 'cached_tokens' and 'creation_tokens', or None. + Default returns None. + """ + return None + + def map_finish_reason(self, raw_reason: str) -> str: + """Optional: map provider-specific stop reason to OpenAI equivalent. + + Default returns the raw reason unchanged. Override for providers + with different stop reason vocabularies. + """ + return raw_reason diff --git a/run_agent.py b/run_agent.py index 9c6e9d7b92..722f7cea4b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6545,6 +6545,15 @@ class AIAgent: return suffix return "[A multimodal message was converted to text for Anthropic compatibility.]" + def _get_anthropic_transport(self): + """Return the cached AnthropicTransport instance (lazy singleton).""" + t = getattr(self, "_anthropic_transport", None) + if t is None: + from agent.transports import get_transport + t = get_transport("anthropic_messages") + self._anthropic_transport = t + return t + def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list: if not any( isinstance(msg, dict) and self._content_has_image_parts(msg.get("content")) @@ -6661,20 +6670,14 @@ class AIAgent: def _build_api_kwargs(self, api_messages: list) -> dict: """Build the keyword arguments dict for the active API mode.""" if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs + _transport = self._get_anthropic_transport() anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages) - # Pass context_length (total input+output window) so the adapter can - # clamp max_tokens (output cap) when the user configured a smaller - # context window than the model's native output limit. ctx_len = getattr(self, "context_compressor", None) ctx_len = ctx_len.context_length if ctx_len else None - # _ephemeral_max_output_tokens is set for one call when the API - # returns "max_tokens too large given prompt" — it caps output to - # the available window space without touching context_length. ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) if ephemeral_out is not None: self._ephemeral_max_output_tokens = None # consume immediately - return build_anthropic_kwargs( + return _transport.build_kwargs( model=self.model, messages=anthropic_messages, tools=self.tools, @@ -7356,9 +7359,9 @@ class AIAgent: codex_kwargs["max_output_tokens"] = 5120 response = self._run_codex_stream(codex_kwargs) elif not _aux_available and self.api_mode == "anthropic_messages": - # Native Anthropic — use the Anthropic client directly - from agent.anthropic_adapter import build_anthropic_kwargs as _build_ant_kwargs - ant_kwargs = _build_ant_kwargs( + # Native Anthropic — use the transport for kwargs + _tflush = self._get_anthropic_transport() + ant_kwargs = _tflush.build_kwargs( model=self.model, messages=api_messages, tools=[memory_tool_def], max_tokens=5120, reasoning_config=None, @@ -7386,10 +7389,15 @@ class AIAgent: if assistant_msg and assistant_msg.tool_calls: tool_calls = assistant_msg.tool_calls elif self.api_mode == "anthropic_messages" and not _aux_available: - from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush - _flush_msg, _ = _nar_flush(response, strip_tool_prefix=self._is_anthropic_oauth) - if _flush_msg and _flush_msg.tool_calls: - tool_calls = _flush_msg.tool_calls + _tfn = self._get_anthropic_transport() + _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth) + if _flush_nr and _flush_nr.tool_calls: + tool_calls = [ + SimpleNamespace( + id=tc.id, type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) for tc in _flush_nr.tool_calls + ] elif hasattr(response, "choices") and response.choices: assistant_message = response.choices[0].message if assistant_message.tool_calls: @@ -8449,14 +8457,14 @@ class AIAgent: summary_kwargs["extra_body"] = summary_extra_body if self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar - _ant_kw = _bak(model=self.model, messages=api_messages, tools=None, + _tsum = self._get_anthropic_transport() + _ant_kw = _tsum.build_kwargs(model=self.model, messages=api_messages, tools=None, max_tokens=self.max_tokens, reasoning_config=self.reasoning_config, is_oauth=self._is_anthropic_oauth, preserve_dots=self._anthropic_preserve_dots()) summary_response = self._anthropic_messages_create(_ant_kw) - _msg, _ = _nar(summary_response, strip_tool_prefix=self._is_anthropic_oauth) - final_response = (_msg.content or "").strip() + _sum_nr = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth) + final_response = (_sum_nr.content or "").strip() else: summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs) @@ -8481,14 +8489,14 @@ class AIAgent: retry_msg, _ = self._normalize_codex_response(retry_response) final_response = (retry_msg.content or "").strip() if retry_msg else "" elif self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2 - _ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None, + _tretry = self._get_anthropic_transport() + _ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None, is_oauth=self._is_anthropic_oauth, max_tokens=self.max_tokens, reasoning_config=self.reasoning_config, preserve_dots=self._anthropic_preserve_dots()) retry_response = self._anthropic_messages_create(_ant_kw2) - _retry_msg, _ = _nar2(retry_response, strip_tool_prefix=self._is_anthropic_oauth) - final_response = (_retry_msg.content or "").strip() + _retry_nr = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth) + final_response = (_retry_nr.content or "").strip() else: summary_kwargs = { "model": self.model, @@ -9357,16 +9365,13 @@ class AIAgent: response_invalid = True error_details.append("response.output is empty") elif self.api_mode == "anthropic_messages": - content_blocks = getattr(response, "content", None) if response is not None else None - if response is None: + _tv = self._get_anthropic_transport() + if not _tv.validate_response(response): response_invalid = True - error_details.append("response is None") - elif not isinstance(content_blocks, list): - response_invalid = True - error_details.append("response.content is not a list") - elif not content_blocks: - response_invalid = True - error_details.append("response.content is empty") + if response is None: + error_details.append("response is None") + else: + error_details.append("response.content invalid (not a non-empty list)") else: if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices: response_invalid = True @@ -9527,8 +9532,8 @@ class AIAgent: else: finish_reason = "stop" elif self.api_mode == "anthropic_messages": - stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"} - finish_reason = stop_reason_map.get(response.stop_reason, "stop") + _tfr = self._get_anthropic_transport() + finish_reason = _tfr.map_finish_reason(response.stop_reason) else: finish_reason = response.choices[0].finish_reason assistant_message = response.choices[0].message @@ -9557,10 +9562,24 @@ class AIAgent: if self.api_mode in ("chat_completions", "bedrock_converse"): _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None elif self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import normalize_anthropic_response - _trunc_msg, _ = normalize_anthropic_response( + _trunc_nr = self._get_anthropic_transport().normalize_response( response, strip_tool_prefix=self._is_anthropic_oauth ) + _trunc_msg = SimpleNamespace( + content=_trunc_nr.content, + tool_calls=[ + SimpleNamespace( + id=tc.id, type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) for tc in (_trunc_nr.tool_calls or []) + ] or None, + reasoning=_trunc_nr.reasoning, + reasoning_content=None, + reasoning_details=( + _trunc_nr.provider_data.get("reasoning_details") + if _trunc_nr.provider_data else None + ), + ) _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False @@ -9819,9 +9838,10 @@ class AIAgent: # Log cache hit stats when prompt caching is active if self._use_prompt_caching: if self.api_mode == "anthropic_messages": - # Anthropic uses cache_read_input_tokens / cache_creation_input_tokens - cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0 - written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0 + _tcs = self._get_anthropic_transport() + _cache = _tcs.extract_cache_stats(response) + cached = _cache["cached_tokens"] if _cache else 0 + written = _cache["creation_tokens"] if _cache else 0 else: # OpenRouter uses prompt_tokens_details.cached_tokens details = getattr(response.usage, 'prompt_tokens_details', None) @@ -10766,15 +10786,13 @@ class AIAgent: if self.api_mode == "codex_responses": assistant_message, finish_reason = self._normalize_codex_response(response) elif self.api_mode == "anthropic_messages": - from agent.anthropic_adapter import normalize_anthropic_response_v2 - _nr = normalize_anthropic_response_v2( + _transport = self._get_anthropic_transport() + _nr = _transport.normalize_response( response, strip_tool_prefix=self._is_anthropic_oauth ) # Back-compat shim: downstream code expects SimpleNamespace with # .content, .tool_calls, .reasoning, .reasoning_content, - # .reasoning_details attributes. This shim makes the cost of the - # old interface visible — it vanishes when the full transport - # wiring lands (PR 3+). + # .reasoning_details attributes. assistant_message = SimpleNamespace( content=_nr.content, tool_calls=[ diff --git a/tests/agent/transports/test_transport.py b/tests/agent/transports/test_transport.py new file mode 100644 index 0000000000..b51336d962 --- /dev/null +++ b/tests/agent/transports/test_transport.py @@ -0,0 +1,220 @@ +"""Tests for the transport ABC, registry, and AnthropicTransport.""" + +import pytest +from types import SimpleNamespace +from unittest.mock import MagicMock + +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse, ToolCall, Usage +from agent.transports import get_transport, register_transport, _REGISTRY + + +# ── ABC contract tests ────────────────────────────────────────────────── + +class TestProviderTransportABC: + """Verify the ABC contract is enforceable.""" + + def test_cannot_instantiate_abc(self): + with pytest.raises(TypeError): + ProviderTransport() + + def test_concrete_must_implement_all_abstract(self): + class Incomplete(ProviderTransport): + @property + def api_mode(self): + return "test" + with pytest.raises(TypeError): + Incomplete() + + def test_minimal_concrete(self): + class Minimal(ProviderTransport): + @property + def api_mode(self): + return "test_minimal" + def convert_messages(self, messages, **kw): + return messages + def convert_tools(self, tools): + return tools + def build_kwargs(self, model, messages, tools=None, **params): + return {"model": model, "messages": messages} + def normalize_response(self, response, **kw): + return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop") + + t = Minimal() + assert t.api_mode == "test_minimal" + assert t.validate_response(None) is True # default + assert t.extract_cache_stats(None) is None # default + assert t.map_finish_reason("end_turn") == "end_turn" # default passthrough + + +# ── Registry tests ─────────────────────────────────────────────────────── + +class TestTransportRegistry: + + def test_get_unregistered_returns_none(self): + assert get_transport("nonexistent_mode") is None + + def test_anthropic_registered_on_import(self): + import agent.transports.anthropic # noqa: F401 + t = get_transport("anthropic_messages") + assert t is not None + assert t.api_mode == "anthropic_messages" + + def test_register_and_get(self): + class DummyTransport(ProviderTransport): + @property + def api_mode(self): + return "dummy_test" + def convert_messages(self, messages, **kw): + return messages + def convert_tools(self, tools): + return tools + def build_kwargs(self, model, messages, tools=None, **params): + return {} + def normalize_response(self, response, **kw): + return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop") + + register_transport("dummy_test", DummyTransport) + t = get_transport("dummy_test") + assert t.api_mode == "dummy_test" + # Cleanup + _REGISTRY.pop("dummy_test", None) + + +# ── AnthropicTransport tests ──────────────────────────────────────────── + +class TestAnthropicTransport: + + @pytest.fixture + def transport(self): + import agent.transports.anthropic # noqa: F401 + return get_transport("anthropic_messages") + + def test_api_mode(self, transport): + assert transport.api_mode == "anthropic_messages" + + def test_convert_tools_simple(self, transport): + tools = [{ + "type": "function", + "function": { + "name": "test_tool", + "description": "A test", + "parameters": {"type": "object", "properties": {}}, + } + }] + result = transport.convert_tools(tools) + assert len(result) == 1 + assert result[0]["name"] == "test_tool" + assert "input_schema" in result[0] + + def test_validate_response_none(self, transport): + assert transport.validate_response(None) is False + + def test_validate_response_empty_content(self, transport): + r = SimpleNamespace(content=[]) + assert transport.validate_response(r) is False + + def test_validate_response_valid(self, transport): + r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")]) + assert transport.validate_response(r) is True + + def test_map_finish_reason(self, transport): + assert transport.map_finish_reason("end_turn") == "stop" + assert transport.map_finish_reason("tool_use") == "tool_calls" + assert transport.map_finish_reason("max_tokens") == "length" + assert transport.map_finish_reason("stop_sequence") == "stop" + assert transport.map_finish_reason("refusal") == "content_filter" + assert transport.map_finish_reason("model_context_window_exceeded") == "length" + assert transport.map_finish_reason("unknown") == "stop" + + def test_extract_cache_stats_none_usage(self, transport): + r = SimpleNamespace(usage=None) + assert transport.extract_cache_stats(r) is None + + def test_extract_cache_stats_with_cache(self, transport): + usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50) + r = SimpleNamespace(usage=usage) + result = transport.extract_cache_stats(r) + assert result == {"cached_tokens": 100, "creation_tokens": 50} + + def test_extract_cache_stats_zero(self, transport): + usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0) + r = SimpleNamespace(usage=usage) + assert transport.extract_cache_stats(r) is None + + def test_normalize_response_text(self, transport): + """Test normalization of a simple text response.""" + r = SimpleNamespace( + content=[SimpleNamespace(type="text", text="Hello world")], + stop_reason="end_turn", + usage=SimpleNamespace(input_tokens=10, output_tokens=5), + model="claude-sonnet-4-6", + ) + nr = transport.normalize_response(r) + assert isinstance(nr, NormalizedResponse) + assert nr.content == "Hello world" + assert nr.tool_calls is None or nr.tool_calls == [] + assert nr.finish_reason == "stop" + + def test_normalize_response_tool_calls(self, transport): + """Test normalization of a tool-use response.""" + r = SimpleNamespace( + content=[ + SimpleNamespace( + type="tool_use", + id="toolu_123", + name="terminal", + input={"command": "ls"}, + ), + ], + stop_reason="tool_use", + usage=SimpleNamespace(input_tokens=10, output_tokens=20), + model="claude-sonnet-4-6", + ) + nr = transport.normalize_response(r) + assert nr.finish_reason == "tool_calls" + assert len(nr.tool_calls) == 1 + tc = nr.tool_calls[0] + assert tc.name == "terminal" + assert tc.id == "toolu_123" + assert '"command"' in tc.arguments + + def test_normalize_response_thinking(self, transport): + """Test normalization preserves thinking content.""" + r = SimpleNamespace( + content=[ + SimpleNamespace(type="thinking", thinking="Let me think..."), + SimpleNamespace(type="text", text="The answer is 42"), + ], + stop_reason="end_turn", + usage=SimpleNamespace(input_tokens=10, output_tokens=15), + model="claude-sonnet-4-6", + ) + nr = transport.normalize_response(r) + assert nr.content == "The answer is 42" + assert nr.reasoning == "Let me think..." + + def test_build_kwargs_returns_dict(self, transport): + """Test build_kwargs produces a usable kwargs dict.""" + messages = [{"role": "user", "content": "Hello"}] + kw = transport.build_kwargs( + model="claude-sonnet-4-6", + messages=messages, + max_tokens=1024, + ) + assert isinstance(kw, dict) + assert "model" in kw + assert "max_tokens" in kw + assert "messages" in kw + + def test_convert_messages_extracts_system(self, transport): + """Test convert_messages separates system from messages.""" + messages = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "Hi"}, + ] + system, msgs = transport.convert_messages(messages) + # System should be extracted + assert system is not None + # Messages should only have user + assert len(msgs) >= 1 From 1830ebfc521ce3b793af1689decc0b8a9259e078 Mon Sep 17 00:00:00 2001 From: AxDSan Date: Fri, 20 Mar 2026 00:38:28 +0000 Subject: [PATCH 16/63] feat: Add KittenTTS provider for local TTS synthesis Add support for KittenTTS - a lightweight, local TTS engine with models ranging from 25-80MB that runs on CPU without requiring a GPU or API key. Features: - Support for 8 built-in voices (Jasper, Bella, Luna, etc.) - Configurable model size (nano 25MB, micro 41MB, mini 80MB) - Adjustable speech speed - Model caching for performance - Automatic WAV to Opus conversion for Telegram voice messages Configuration example (config.yaml): tts: provider: kittentts kittentts: model: KittenML/kitten-tts-nano-0.8-int8 voice: Jasper speed: 1.0 clean_text: true Installation: pip install https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl --- tools/tts_tool.py | 87 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/tools/tts_tool.py b/tools/tts_tool.py index adc6524c46..fa5a8159cf 100644 --- a/tools/tts_tool.py +++ b/tools/tts_tool.py @@ -79,6 +79,12 @@ def _import_sounddevice(): return sd +def _import_kittentts(): + """Lazy import KittenTTS. Returns the class or raises ImportError.""" + from kittentts import KittenTTS + return KittenTTS + + # =========================================================================== # Defaults # =========================================================================== @@ -88,6 +94,8 @@ DEFAULT_ELEVENLABS_VOICE_ID = "pNInz6obpgDQGcFmaJgB" # Adam DEFAULT_ELEVENLABS_MODEL_ID = "eleven_multilingual_v2" DEFAULT_ELEVENLABS_STREAMING_MODEL_ID = "eleven_flash_v2_5" DEFAULT_OPENAI_MODEL = "gpt-4o-mini-tts" +DEFAULT_KITTENTTS_MODEL = "KittenML/kitten-tts-nano-0.8-int8" # 25MB +DEFAULT_KITTENTTS_VOICE = "Jasper" DEFAULT_OPENAI_VOICE = "alloy" DEFAULT_OPENAI_BASE_URL = "https://api.openai.com/v1" DEFAULT_MINIMAX_MODEL = "speech-2.8-hd" @@ -758,6 +766,69 @@ def _generate_neutts(text: str, output_path: str, tts_config: Dict[str, Any]) -> return output_path +# =========================================================================== +# Provider: KittenTTS (local, lightweight) +# =========================================================================== + +# Module-level cache for KittenTTS model instance +_kittentts_model_cache: Dict[str, Any] = {} + + +def _generate_kittentts(text: str, output_path: str, tts_config: Dict[str, Any]) -> str: + """Generate speech using KittenTTS local ONNX model. + + KittenTTS is a lightweight TTS engine (25-80MB models) that runs + entirely on CPU without requiring a GPU or API key. + + Args: + text: Text to convert to speech. + output_path: Where to save the audio file. + tts_config: TTS config dict. + + Returns: + Path to the saved audio file. + """ + KittenTTS = _import_kittentts() + kt_config = tts_config.get("kittentts", {}) + model_name = kt_config.get("model", DEFAULT_KITTENTTS_MODEL) + voice = kt_config.get("voice", DEFAULT_KITTENTTS_VOICE) + speed = kt_config.get("speed", 1.0) + clean_text = kt_config.get("clean_text", True) + + # Use cached model instance if available + global _kittentts_model_cache + if model_name not in _kittentts_model_cache: + logger.info("[KittenTTS] Loading model: %s", model_name) + _kittentts_model_cache[model_name] = KittenTTS(model_name) + logger.info("[KittenTTS] Model loaded successfully") + + model = _kittentts_model_cache[model_name] + + # Generate audio (returns numpy array at 24kHz) + audio = model.generate(text, voice=voice, speed=speed, clean_text=clean_text) + + # Save as WAV + import soundfile as sf + wav_path = output_path + if not output_path.endswith(".wav"): + wav_path = output_path.rsplit(".", 1)[0] + ".wav" + + sf.write(wav_path, audio, 24000) + + # Convert to desired format if needed + if wav_path != output_path: + ffmpeg = shutil.which("ffmpeg") + if ffmpeg: + conv_cmd = [ffmpeg, "-i", wav_path, "-y", "-loglevel", "error", output_path] + subprocess.run(conv_cmd, check=True, timeout=30) + os.remove(wav_path) + else: + # No ffmpeg — rename the WAV to the expected path + os.rename(wav_path, output_path) + + return output_path + + # =========================================================================== # Main tool function # =========================================================================== @@ -877,6 +948,18 @@ def text_to_speech_tool( logger.info("Generating speech with NeuTTS (local)...") _generate_neutts(text, file_str, tts_config) + elif provider == "kittentts": + try: + _import_kittentts() + except ImportError: + return json.dumps({ + "success": False, + "error": "KittenTTS provider selected but 'kittentts' package not installed. " + "Run: pip install https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl" + }, ensure_ascii=False) + logger.info("Generating speech with KittenTTS (local, ~25MB)...") + _generate_kittentts(text, file_str, tts_config) + else: # Default: Edge TTS (free), with NeuTTS as local fallback edge_available = True @@ -914,9 +997,9 @@ def text_to_speech_tool( }, ensure_ascii=False) # Try Opus conversion for Telegram compatibility - # Edge TTS outputs MP3, NeuTTS outputs WAV — both need ffmpeg conversion + # Edge TTS outputs MP3, NeuTTS/KittenTTS output WAV — all need ffmpeg conversion voice_compatible = False - if provider in ("edge", "neutts", "minimax", "xai") and not file_str.endswith(".ogg"): + if provider in ("edge", "neutts", "minimax", "xai", "kittentts") and not file_str.endswith(".ogg"): opus_path = _convert_to_opus(file_str) if opus_path: file_str = opus_path From 2d7ff9c5bd4c41079cd8d1dcd07c0d4c486e2fea Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 00:44:37 -0700 Subject: [PATCH 17/63] feat(tts): complete KittenTTS integration (tools/setup/docs/tests) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Builds on @AxDSan's PR #2109 to finish the KittenTTS wiring so the provider behaves like every other TTS backend end to end. - tools/tts_tool.py: `_check_kittentts_available()` helper and wire into `check_tts_requirements()`; extend Opus-conversion list to include kittentts (WAV → Opus for Telegram voice bubbles); point the missing-package error at `hermes setup tts`. - hermes_cli/tools_config.py: add KittenTTS entry to the "Text-to-Speech" toolset picker, with a `kittentts` post_setup hook that auto-installs the wheel + soundfile via pip. - hermes_cli/setup.py: `_install_kittentts_deps()`, new choice + install flow in `_setup_tts_provider()`, provider_labels entry, and status row in the `hermes setup` summary. - website/docs/user-guide/features/tts.md: add KittenTTS to the provider table, config example, ffmpeg note, and the zero-config voice-bubble tip. - tests/tools/test_tts_kittentts.py: 10 unit tests covering generation, model caching, config passthrough, ffmpeg conversion, availability detection, and the missing-package dispatcher branch. E2E verified against the real `kittentts` wheel: - WAV direct output (pcm_s16le, 24kHz mono) - MP3 conversion via ffmpeg (from WAV) - Telegram flow (provider in Opus-conversion list) produces `codec_name=opus`, 48kHz mono, `voice_compatible=True`, and the `[[audio_as_voice]]` marker - check_tts_requirements() returns True when kittentts is installed --- hermes_cli/setup.py | 62 +++++++- hermes_cli/tools_config.py | 38 +++++ tests/tools/test_tts_kittentts.py | 198 ++++++++++++++++++++++++ tools/tts_tool.py | 14 +- website/docs/user-guide/features/tts.md | 15 +- 5 files changed, 321 insertions(+), 6 deletions(-) create mode 100644 tests/tools/test_tts_kittentts.py diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 3c00fa4f0f..7eb25965ae 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -441,6 +441,16 @@ def _print_setup_summary(config: dict, hermes_home): tool_status.append(("Text-to-Speech (NeuTTS local)", True, None)) else: tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'")) + elif tts_provider == "kittentts": + try: + import importlib.util + kittentts_ok = importlib.util.find_spec("kittentts") is not None + except Exception: + kittentts_ok = False + if kittentts_ok: + tool_status.append(("Text-to-Speech (KittenTTS local)", True, None)) + else: + tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'")) else: tool_status.append(("Text-to-Speech (Edge TTS)", True, None)) @@ -901,6 +911,31 @@ def _install_neutts_deps() -> bool: return False +def _install_kittentts_deps() -> bool: + """Install KittenTTS dependencies with user approval. Returns True on success.""" + import subprocess + import sys + + wheel_url = ( + "https://github.com/KittenML/KittenTTS/releases/download/" + "0.8.1/kittentts-0.8.1-py3-none-any.whl" + ) + print() + print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...") + print() + try: + subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"], + check=True, timeout=300, + ) + print_success("kittentts installed successfully") + return True + except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e: + print_error(f"Failed to install kittentts: {e}") + print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile") + return False + + def _setup_tts_provider(config: dict): """Interactive TTS provider selection with install flow for NeuTTS.""" tts_config = config.get("tts", {}) @@ -916,6 +951,7 @@ def _setup_tts_provider(config: dict): "mistral": "Mistral Voxtral TTS", "gemini": "Google Gemini TTS", "neutts": "NeuTTS", + "kittentts": "KittenTTS", } current_label = provider_labels.get(current_provider, current_provider) @@ -939,9 +975,10 @@ def _setup_tts_provider(config: dict): "Mistral Voxtral TTS (multilingual, native Opus, needs API key)", "Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)", "NeuTTS (local on-device, free, ~300MB model download)", + "KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)", ] ) - providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"]) + providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"]) choices.append(f"Keep current ({current_label})") keep_current_idx = len(choices) - 1 idx = prompt_choice("Select TTS provider:", choices, keep_current_idx) @@ -1060,6 +1097,29 @@ def _setup_tts_provider(config: dict): print_warning("No API key provided. Falling back to Edge TTS.") selected = "edge" + elif selected == "kittentts": + # Check if already installed + try: + import importlib.util + already_installed = importlib.util.find_spec("kittentts") is not None + except Exception: + already_installed = False + + if already_installed: + print_success("KittenTTS is already installed") + else: + print() + print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).") + print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo") + print() + if prompt_yes_no("Install KittenTTS now?", True): + if not _install_kittentts_deps(): + print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.") + selected = "edge" + else: + print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.") + selected = "edge" + # Save the selection if "tts" not in config: config["tts"] = {} diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index cb1f393713..24c5fde5fb 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -182,6 +182,14 @@ TOOL_CATEGORIES = { ], "tts_provider": "gemini", }, + { + "name": "KittenTTS", + "badge": "local · free", + "tag": "Lightweight local ONNX TTS (~25MB), no API key", + "env_vars": [], + "tts_provider": "kittentts", + "post_setup": "kittentts", + }, ], }, "web": { @@ -423,6 +431,36 @@ def _run_post_setup(post_setup_key: str): _print_warning(" Node.js not found. Install Camofox via Docker:") _print_info(" docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser") + elif post_setup_key == "kittentts": + try: + __import__("kittentts") + _print_success(" kittentts is already installed") + return + except ImportError: + pass + import subprocess + _print_info(" Installing kittentts (~25-80MB model, CPU-only)...") + wheel_url = ( + "https://github.com/KittenML/KittenTTS/releases/download/" + "0.8.1/kittentts-0.8.1-py3-none-any.whl" + ) + try: + result = subprocess.run( + [sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"], + capture_output=True, text=True, timeout=300, + ) + if result.returncode == 0: + _print_success(" kittentts installed") + _print_info(" Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo") + _print_info(" Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)") + else: + _print_warning(" kittentts install failed:") + _print_info(f" {result.stderr.strip()[:300]}") + _print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile") + except subprocess.TimeoutExpired: + _print_warning(" kittentts install timed out (>5min)") + _print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile") + elif post_setup_key == "rl_training": try: __import__("tinker_atropos") diff --git a/tests/tools/test_tts_kittentts.py b/tests/tools/test_tts_kittentts.py new file mode 100644 index 0000000000..ab841f59f4 --- /dev/null +++ b/tests/tools/test_tts_kittentts.py @@ -0,0 +1,198 @@ +"""Tests for the KittenTTS local provider in tools/tts_tool.py.""" + +import json +from unittest.mock import MagicMock, patch + +import numpy as np +import pytest + + +@pytest.fixture(autouse=True) +def clean_env(monkeypatch): + for key in ("HERMES_SESSION_PLATFORM",): + monkeypatch.delenv(key, raising=False) + + +@pytest.fixture(autouse=True) +def clear_kittentts_cache(): + """Reset the module-level model cache between tests.""" + from tools import tts_tool as _tt + _tt._kittentts_model_cache.clear() + yield + _tt._kittentts_model_cache.clear() + + +@pytest.fixture +def mock_kittentts_module(): + """Inject a fake kittentts + soundfile module that return stub objects.""" + fake_model = MagicMock() + # 24kHz float32 PCM at ~2s of silence + fake_model.generate.return_value = np.zeros(48000, dtype=np.float32) + fake_cls = MagicMock(return_value=fake_model) + fake_kittentts = MagicMock() + fake_kittentts.KittenTTS = fake_cls + + # Stub soundfile — the real package isn't installed in CI venv, and + # _generate_kittentts does `import soundfile as sf` at runtime. + fake_sf = MagicMock() + def _fake_write(path, audio, samplerate): + # Emulate writing a real file so downstream path checks succeed. + import pathlib + pathlib.Path(path).write_bytes(b"RIFF\x00\x00\x00\x00WAVEfmt fake") + fake_sf.write = _fake_write + + with patch.dict( + "sys.modules", + {"kittentts": fake_kittentts, "soundfile": fake_sf}, + ): + yield fake_model, fake_cls + + +class TestGenerateKittenTts: + def test_successful_wav_generation(self, tmp_path, mock_kittentts_module): + from tools.tts_tool import _generate_kittentts + + fake_model, fake_cls = mock_kittentts_module + output_path = str(tmp_path / "test.wav") + result = _generate_kittentts("Hello world", output_path, {}) + + assert result == output_path + assert (tmp_path / "test.wav").exists() + fake_cls.assert_called_once() + fake_model.generate.assert_called_once() + + def test_config_passes_voice_speed_cleantext(self, tmp_path, mock_kittentts_module): + from tools.tts_tool import _generate_kittentts + + fake_model, _ = mock_kittentts_module + config = { + "kittentts": { + "model": "KittenML/kitten-tts-mini-0.8", + "voice": "Luna", + "speed": 1.25, + "clean_text": False, + } + } + _generate_kittentts("Hi there", str(tmp_path / "out.wav"), config) + + call_kwargs = fake_model.generate.call_args.kwargs + assert call_kwargs["voice"] == "Luna" + assert call_kwargs["speed"] == 1.25 + assert call_kwargs["clean_text"] is False + + def test_default_model_and_voice(self, tmp_path, mock_kittentts_module): + from tools.tts_tool import ( + DEFAULT_KITTENTTS_MODEL, + DEFAULT_KITTENTTS_VOICE, + _generate_kittentts, + ) + + fake_model, fake_cls = mock_kittentts_module + _generate_kittentts("Hi", str(tmp_path / "out.wav"), {}) + + fake_cls.assert_called_once_with(DEFAULT_KITTENTTS_MODEL) + assert fake_model.generate.call_args.kwargs["voice"] == DEFAULT_KITTENTTS_VOICE + + def test_model_is_cached_across_calls(self, tmp_path, mock_kittentts_module): + from tools.tts_tool import _generate_kittentts + + _, fake_cls = mock_kittentts_module + _generate_kittentts("One", str(tmp_path / "a.wav"), {}) + _generate_kittentts("Two", str(tmp_path / "b.wav"), {}) + + # Same model name → class instantiated exactly once + assert fake_cls.call_count == 1 + + def test_different_models_are_cached_separately(self, tmp_path, mock_kittentts_module): + from tools.tts_tool import _generate_kittentts + + _, fake_cls = mock_kittentts_module + _generate_kittentts( + "A", str(tmp_path / "a.wav"), + {"kittentts": {"model": "KittenML/kitten-tts-nano-0.8-int8"}}, + ) + _generate_kittentts( + "B", str(tmp_path / "b.wav"), + {"kittentts": {"model": "KittenML/kitten-tts-mini-0.8"}}, + ) + + assert fake_cls.call_count == 2 + + def test_non_wav_extension_triggers_ffmpeg_conversion( + self, tmp_path, mock_kittentts_module, monkeypatch + ): + """Non-.wav output path causes WAV → target ffmpeg conversion.""" + from tools import tts_tool as _tt + + calls = [] + + def fake_shutil_which(cmd): + return "/usr/bin/ffmpeg" if cmd == "ffmpeg" else None + + def fake_run(cmd, check=False, timeout=None, **kw): + calls.append(cmd) + # Emulate ffmpeg writing the output file + import pathlib + out_path = cmd[-1] + pathlib.Path(out_path).write_bytes(b"fake-mp3-data") + return MagicMock(returncode=0) + + monkeypatch.setattr(_tt.shutil, "which", fake_shutil_which) + monkeypatch.setattr(_tt.subprocess, "run", fake_run) + + output_path = str(tmp_path / "test.mp3") + result = _tt._generate_kittentts("Hi", output_path, {}) + + assert result == output_path + assert len(calls) == 1 + assert calls[0][0] == "/usr/bin/ffmpeg" + + def test_missing_kittentts_raises_import_error(self, tmp_path, monkeypatch): + """When kittentts package is not installed, _import_kittentts raises.""" + import sys + monkeypatch.setitem(sys.modules, "kittentts", None) + from tools.tts_tool import _generate_kittentts + + with pytest.raises((ImportError, TypeError)): + _generate_kittentts("Hi", str(tmp_path / "out.wav"), {}) + + +class TestCheckKittenttsAvailable: + def test_reports_available_when_package_present(self, monkeypatch): + import importlib.util + from tools.tts_tool import _check_kittentts_available + + fake_spec = MagicMock() + monkeypatch.setattr( + importlib.util, "find_spec", + lambda name: fake_spec if name == "kittentts" else None, + ) + assert _check_kittentts_available() is True + + def test_reports_unavailable_when_package_missing(self, monkeypatch): + import importlib.util + from tools.tts_tool import _check_kittentts_available + + monkeypatch.setattr(importlib.util, "find_spec", lambda name: None) + assert _check_kittentts_available() is False + + +class TestDispatcherBranch: + def test_kittentts_not_installed_returns_helpful_error(self, monkeypatch, tmp_path): + """When provider=kittentts but package missing, return JSON error with setup hint.""" + import sys + monkeypatch.setitem(sys.modules, "kittentts", None) + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools.tts_tool import text_to_speech_tool + + # Write a config telling it to use kittentts + import yaml + (tmp_path / "config.yaml").write_text( + yaml.safe_dump({"tts": {"provider": "kittentts"}}) + ) + + result = json.loads(text_to_speech_tool(text="Hello")) + assert result["success"] is False + assert "kittentts" in result["error"].lower() + assert "hermes setup tts" in result["error"].lower() diff --git a/tools/tts_tool.py b/tools/tts_tool.py index fa5a8159cf..b83fa4d73e 100644 --- a/tools/tts_tool.py +++ b/tools/tts_tool.py @@ -703,6 +703,15 @@ def _check_neutts_available() -> bool: return False +def _check_kittentts_available() -> bool: + """Check if the kittentts engine is importable (installed locally).""" + try: + import importlib.util + return importlib.util.find_spec("kittentts") is not None + except Exception: + return False + + def _default_neutts_ref_audio() -> str: """Return path to the bundled default voice reference audio.""" return str(Path(__file__).parent / "neutts_samples" / "jo.wav") @@ -955,7 +964,8 @@ def text_to_speech_tool( return json.dumps({ "success": False, "error": "KittenTTS provider selected but 'kittentts' package not installed. " - "Run: pip install https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl" + "Run 'hermes setup tts' and choose KittenTTS, or install manually: " + "pip install https://github.com/KittenML/KittenTTS/releases/download/0.8.1/kittentts-0.8.1-py3-none-any.whl" }, ensure_ascii=False) logger.info("Generating speech with KittenTTS (local, ~25MB)...") _generate_kittentts(text, file_str, tts_config) @@ -1084,6 +1094,8 @@ def check_tts_requirements() -> bool: pass if _check_neutts_available(): return True + if _check_kittentts_available(): + return True return False diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md index 6f7fc89506..2bf6430ff7 100644 --- a/website/docs/user-guide/features/tts.md +++ b/website/docs/user-guide/features/tts.md @@ -14,7 +14,7 @@ If you have a paid [Nous Portal](https://portal.nousresearch.com) subscription, ## Text-to-Speech -Convert text to speech with eight providers: +Convert text to speech with nine providers: | Provider | Quality | Cost | API Key | |----------|---------|------|---------| @@ -25,7 +25,8 @@ Convert text to speech with eight providers: | **Mistral (Voxtral TTS)** | Excellent | Paid | `MISTRAL_API_KEY` | | **Google Gemini TTS** | Excellent | Free tier | `GEMINI_API_KEY` | | **xAI TTS** | Excellent | Paid | `XAI_API_KEY` | -| **NeuTTS** | Good | Free | None needed | +| **NeuTTS** | Good | Free (local) | None needed | +| **KittenTTS** | Good | Free (local) | None needed | ### Platform Delivery @@ -41,7 +42,7 @@ Convert text to speech with eight providers: ```yaml # In ~/.hermes/config.yaml tts: - provider: "edge" # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts" + provider: "edge" # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "gemini" | "xai" | "neutts" | "kittentts" speed: 1.0 # Global speed multiplier (provider-specific settings override this) edge: voice: "en-US-AriaNeural" # 322 voices, 74 languages @@ -77,6 +78,11 @@ tts: ref_text: '' model: neuphonic/neutts-air-q4-gguf device: cpu + kittentts: + model: KittenML/kitten-tts-nano-0.8-int8 # 25MB int8; also: kitten-tts-micro-0.8 (41MB), kitten-tts-mini-0.8 (80MB) + voice: Jasper # Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo + speed: 1.0 # 0.5 - 2.0 + clean_text: true # Expand numbers, currencies, units ``` **Speed control**: The global `tts.speed` value applies to all providers by default. Each provider can override it with its own `speed` setting (e.g., `tts.openai.speed: 1.5`). Provider-specific speed takes precedence over the global value. Default is `1.0` (normal speed). @@ -91,6 +97,7 @@ Telegram voice bubbles require Opus/OGG audio format: - **Google Gemini TTS** outputs raw PCM and uses **ffmpeg** to encode Opus directly for Telegram voice bubbles - **xAI TTS** outputs MP3 and needs **ffmpeg** to convert for Telegram voice bubbles - **NeuTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles +- **KittenTTS** outputs WAV and also needs **ffmpeg** to convert for Telegram voice bubbles ```bash # Ubuntu/Debian @@ -103,7 +110,7 @@ brew install ffmpeg sudo dnf install ffmpeg ``` -Without ffmpeg, Edge TTS, MiniMax TTS, and NeuTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble). +Without ffmpeg, Edge TTS, MiniMax TTS, NeuTTS, and KittenTTS audio are sent as regular audio files (playable, but shown as a rectangular player instead of a voice bubble). :::tip If you want voice bubbles without installing ffmpeg, switch to the OpenAI, ElevenLabs, or Mistral provider. From 517f5e263953ab92c6076cd888ea755af106d6d4 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 00:44:45 -0700 Subject: [PATCH 18/63] chore(release): map abdi.moya@gmail.com -> AxDSan for release notes --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index efe32f2364..b880c48f61 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -103,6 +103,7 @@ AUTHOR_MAP = { "desaiaum08@gmail.com": "Aum08Desai", "shannon.sands.1979@gmail.com": "shannonsands", "shannon@nousresearch.com": "shannonsands", + "abdi.moya@gmail.com": "AxDSan", "eri@plasticlabs.ai": "Erosika", "hjcpuro@gmail.com": "hjc-puro", "xaydinoktay@gmail.com": "aydnOktay", From 9b36636363ddddc2ef8244449f9321dcc3224420 Mon Sep 17 00:00:00 2001 From: ifrederico Date: Fri, 20 Mar 2026 13:00:37 -0400 Subject: [PATCH 19/63] fix(security): apply file safety to copilot acp fs --- agent/copilot_acp_client.py | 34 ++++-- agent/file_safety.py | 111 +++++++++++++++++++ tests/agent/test_copilot_acp_client.py | 142 +++++++++++++++++++++++++ tools/file_operations.py | 68 ++---------- tools/file_tools.py | 24 ++--- 5 files changed, 295 insertions(+), 84 deletions(-) create mode 100644 agent/file_safety.py create mode 100644 tests/agent/test_copilot_acp_client.py diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 7a0d3dfd65..783f949567 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -21,6 +21,9 @@ from pathlib import Path from types import SimpleNamespace from typing import Any +from agent.file_safety import get_read_block_error, is_write_denied +from agent.redact import redact_sensitive_text + ACP_MARKER_BASE_URL = "acp://copilot" _DEFAULT_TIMEOUT_SECONDS = 900.0 @@ -54,6 +57,18 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]: } +def _permission_denied(message_id: Any) -> dict[str, Any]: + return { + "jsonrpc": "2.0", + "id": message_id, + "result": { + "outcome": { + "outcome": "cancelled", + } + }, + } + + def _format_messages_as_prompt( messages: list[dict[str, Any]], model: str | None = None, @@ -535,18 +550,13 @@ class CopilotACPClient: params = msg.get("params") or {} if method == "session/request_permission": - response = { - "jsonrpc": "2.0", - "id": message_id, - "result": { - "outcome": { - "outcome": "allow_once", - } - }, - } + response = _permission_denied(message_id) elif method == "fs/read_text_file": try: path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd) + block_error = get_read_block_error(str(path)) + if block_error: + raise PermissionError(block_error) content = path.read_text() if path.exists() else "" line = params.get("line") limit = params.get("limit") @@ -555,6 +565,8 @@ class CopilotACPClient: start = line - 1 end = start + limit if isinstance(limit, int) and limit > 0 else None content = "".join(lines[start:end]) + if content: + content = redact_sensitive_text(content) response = { "jsonrpc": "2.0", "id": message_id, @@ -567,6 +579,10 @@ class CopilotACPClient: elif method == "fs/write_text_file": try: path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd) + if is_write_denied(str(path)): + raise PermissionError( + f"Write denied: '{path}' is a protected system/credential file." + ) path.parent.mkdir(parents=True, exist_ok=True) path.write_text(str(params.get("content") or "")) response = { diff --git a/agent/file_safety.py b/agent/file_safety.py new file mode 100644 index 0000000000..09da46cafd --- /dev/null +++ b/agent/file_safety.py @@ -0,0 +1,111 @@ +"""Shared file safety rules used by both tools and ACP shims.""" + +from __future__ import annotations + +import os +from pathlib import Path +from typing import Optional + + +def _hermes_home_path() -> Path: + """Resolve the active HERMES_HOME (profile-aware) without circular imports.""" + try: + from hermes_constants import get_hermes_home # local import to avoid cycles + return get_hermes_home() + except Exception: + return Path(os.path.expanduser("~/.hermes")) + + +def build_write_denied_paths(home: str) -> set[str]: + """Return exact sensitive paths that must never be written.""" + hermes_home = _hermes_home_path() + return { + os.path.realpath(p) + for p in [ + os.path.join(home, ".ssh", "authorized_keys"), + os.path.join(home, ".ssh", "id_rsa"), + os.path.join(home, ".ssh", "id_ed25519"), + os.path.join(home, ".ssh", "config"), + str(hermes_home / ".env"), + os.path.join(home, ".bashrc"), + os.path.join(home, ".zshrc"), + os.path.join(home, ".profile"), + os.path.join(home, ".bash_profile"), + os.path.join(home, ".zprofile"), + os.path.join(home, ".netrc"), + os.path.join(home, ".pgpass"), + os.path.join(home, ".npmrc"), + os.path.join(home, ".pypirc"), + "/etc/sudoers", + "/etc/passwd", + "/etc/shadow", + ] + } + + +def build_write_denied_prefixes(home: str) -> list[str]: + """Return sensitive directory prefixes that must never be written.""" + return [ + os.path.realpath(p) + os.sep + for p in [ + os.path.join(home, ".ssh"), + os.path.join(home, ".aws"), + os.path.join(home, ".gnupg"), + os.path.join(home, ".kube"), + "/etc/sudoers.d", + "/etc/systemd", + os.path.join(home, ".docker"), + os.path.join(home, ".azure"), + os.path.join(home, ".config", "gh"), + ] + ] + + +def get_safe_write_root() -> Optional[str]: + """Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset.""" + root = os.getenv("HERMES_WRITE_SAFE_ROOT", "") + if not root: + return None + try: + return os.path.realpath(os.path.expanduser(root)) + except Exception: + return None + + +def is_write_denied(path: str) -> bool: + """Return True if path is blocked by the write denylist or safe root.""" + home = os.path.realpath(os.path.expanduser("~")) + resolved = os.path.realpath(os.path.expanduser(str(path))) + + if resolved in build_write_denied_paths(home): + return True + for prefix in build_write_denied_prefixes(home): + if resolved.startswith(prefix): + return True + + safe_root = get_safe_write_root() + if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)): + return True + + return False + + +def get_read_block_error(path: str) -> Optional[str]: + """Return an error message when a read targets internal Hermes cache files.""" + resolved = Path(path).expanduser().resolve() + hermes_home = _hermes_home_path().resolve() + blocked_dirs = [ + hermes_home / "skills" / ".hub" / "index-cache", + hermes_home / "skills" / ".hub", + ] + for blocked in blocked_dirs: + try: + resolved.relative_to(blocked) + except ValueError: + continue + return ( + f"Access denied: {path} is an internal Hermes cache file " + "and cannot be read directly to prevent prompt injection. " + "Use the skills_list or skill_view tools instead." + ) + return None diff --git a/tests/agent/test_copilot_acp_client.py b/tests/agent/test_copilot_acp_client.py new file mode 100644 index 0000000000..ce481a57b9 --- /dev/null +++ b/tests/agent/test_copilot_acp_client.py @@ -0,0 +1,142 @@ +"""Focused regressions for the Copilot ACP shim safety layer.""" + +from __future__ import annotations + +import io +import json +import os +import tempfile +import unittest +from pathlib import Path +from unittest.mock import patch + +from agent.copilot_acp_client import CopilotACPClient + + +class _FakeProcess: + def __init__(self) -> None: + self.stdin = io.StringIO() + + +class CopilotACPClientSafetyTests(unittest.TestCase): + def setUp(self) -> None: + self.client = CopilotACPClient(acp_cwd="/tmp") + + def _dispatch(self, message: dict, *, cwd: str) -> dict: + process = _FakeProcess() + handled = self.client._handle_server_message( + message, + process=process, + cwd=cwd, + text_parts=[], + reasoning_parts=[], + ) + self.assertTrue(handled) + payload = process.stdin.getvalue().strip() + self.assertTrue(payload) + return json.loads(payload) + + def test_request_permission_is_not_auto_allowed(self) -> None: + response = self._dispatch( + { + "jsonrpc": "2.0", + "id": 1, + "method": "session/request_permission", + "params": {}, + }, + cwd="/tmp", + ) + + outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome")) + self.assertEqual(outcome, "cancelled") + + def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + home = Path(tmpdir) / "home" + blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json" + blocked.parent.mkdir(parents=True, exist_ok=True) + blocked.write_text('{"token":"sk-test-secret-1234567890"}') + + with patch.dict(os.environ, {"HOME": str(home)}, clear=False): + response = self._dispatch( + { + "jsonrpc": "2.0", + "id": 2, + "method": "fs/read_text_file", + "params": {"path": str(blocked)}, + }, + cwd=str(home), + ) + + self.assertIn("error", response) + + def test_read_text_file_redacts_sensitive_content(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + secret_file = root / "config.env" + secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012") + + response = self._dispatch( + { + "jsonrpc": "2.0", + "id": 3, + "method": "fs/read_text_file", + "params": {"path": str(secret_file)}, + }, + cwd=str(root), + ) + + content = ((response.get("result") or {}).get("content") or "") + self.assertNotIn("abc123def456", content) + self.assertIn("OPENAI_API_KEY=", content) + + def test_write_text_file_reuses_write_denylist(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + home = Path(tmpdir) / "home" + target = home / ".ssh" / "id_rsa" + target.parent.mkdir(parents=True, exist_ok=True) + + with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True): + response = self._dispatch( + { + "jsonrpc": "2.0", + "id": 4, + "method": "fs/write_text_file", + "params": { + "path": str(target), + "content": "fake-private-key", + }, + }, + cwd=str(home), + ) + + self.assertIn("error", response) + self.assertFalse(target.exists()) + + def test_write_text_file_respects_safe_root(self) -> None: + with tempfile.TemporaryDirectory() as tmpdir: + root = Path(tmpdir) + safe_root = root / "workspace" + safe_root.mkdir() + outside = root / "outside.txt" + + with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False): + response = self._dispatch( + { + "jsonrpc": "2.0", + "id": 5, + "method": "fs/write_text_file", + "params": { + "path": str(outside), + "content": "should-not-write", + }, + }, + cwd=str(root), + ) + + self.assertIn("error", response) + self.assertFalse(outside.exists()) + + +if __name__ == "__main__": + unittest.main() diff --git a/tools/file_operations.py b/tools/file_operations.py index 8c3897bb2b..59070d7ce0 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -35,6 +35,13 @@ from pathlib import Path from hermes_constants import get_hermes_home from tools.binary_extensions import BINARY_EXTENSIONS +from agent.file_safety import ( + build_write_denied_paths, + build_write_denied_prefixes, + get_safe_write_root as _shared_get_safe_write_root, + is_write_denied as _shared_is_write_denied, +) + # --------------------------------------------------------------------------- # Write-path deny list — blocks writes to sensitive system/credential files @@ -42,41 +49,9 @@ from tools.binary_extensions import BINARY_EXTENSIONS _HOME = str(Path.home()) -WRITE_DENIED_PATHS = { - os.path.realpath(p) for p in [ - os.path.join(_HOME, ".ssh", "authorized_keys"), - os.path.join(_HOME, ".ssh", "id_rsa"), - os.path.join(_HOME, ".ssh", "id_ed25519"), - os.path.join(_HOME, ".ssh", "config"), - str(get_hermes_home() / ".env"), - os.path.join(_HOME, ".bashrc"), - os.path.join(_HOME, ".zshrc"), - os.path.join(_HOME, ".profile"), - os.path.join(_HOME, ".bash_profile"), - os.path.join(_HOME, ".zprofile"), - os.path.join(_HOME, ".netrc"), - os.path.join(_HOME, ".pgpass"), - os.path.join(_HOME, ".npmrc"), - os.path.join(_HOME, ".pypirc"), - "/etc/sudoers", - "/etc/passwd", - "/etc/shadow", - ] -} +WRITE_DENIED_PATHS = build_write_denied_paths(_HOME) -WRITE_DENIED_PREFIXES = [ - os.path.realpath(p) + os.sep for p in [ - os.path.join(_HOME, ".ssh"), - os.path.join(_HOME, ".aws"), - os.path.join(_HOME, ".gnupg"), - os.path.join(_HOME, ".kube"), - "/etc/sudoers.d", - "/etc/systemd", - os.path.join(_HOME, ".docker"), - os.path.join(_HOME, ".azure"), - os.path.join(_HOME, ".config", "gh"), - ] -] +WRITE_DENIED_PREFIXES = build_write_denied_prefixes(_HOME) def _get_safe_write_root() -> Optional[str]: @@ -87,33 +62,12 @@ def _get_safe_write_root() -> Optional[str]: not on the static deny list. Opt-in hardening for gateway/messaging deployments that should only touch a workspace checkout. """ - root = os.getenv("HERMES_WRITE_SAFE_ROOT", "") - if not root: - return None - try: - return os.path.realpath(os.path.expanduser(root)) - except Exception: - return None + return _shared_get_safe_write_root() def _is_write_denied(path: str) -> bool: """Return True if path is on the write deny list.""" - resolved = os.path.realpath(os.path.expanduser(str(path))) - - # 1) Static deny list - if resolved in WRITE_DENIED_PATHS: - return True - for prefix in WRITE_DENIED_PREFIXES: - if resolved.startswith(prefix): - return True - - # 2) Optional safe-root sandbox - safe_root = _get_safe_write_root() - if safe_root: - if not (resolved == safe_root or resolved.startswith(safe_root + os.sep)): - return True - - return False + return _shared_is_write_denied(path) # ============================================================================= diff --git a/tools/file_tools.py b/tools/file_tools.py index 3b2044c9da..af6701f823 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -7,6 +7,9 @@ import logging import os import threading from pathlib import Path +from typing import Optional + +from agent.file_safety import get_read_block_error from tools.binary_extensions import has_binary_extension from tools.file_operations import ShellFileOperations from agent.redact import redact_sensitive_text @@ -373,24 +376,9 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str = # ── Hermes internal path guard ──────────────────────────────── # Prevent prompt injection via catalog or hub metadata files. - from hermes_constants import get_hermes_home as _get_hh - _hermes_home = _get_hh().resolve() - _blocked_dirs = [ - _hermes_home / "skills" / ".hub" / "index-cache", - _hermes_home / "skills" / ".hub", - ] - for _blocked in _blocked_dirs: - try: - _resolved.relative_to(_blocked) - return json.dumps({ - "error": ( - f"Access denied: {path} is an internal Hermes cache file " - "and cannot be read directly to prevent prompt injection. " - "Use the skills_list or skill_view tools instead." - ) - }) - except ValueError: - pass + block_error = get_read_block_error(path) + if block_error: + return json.dumps({"error": block_error}) # ── Dedup check ─────────────────────────────────────────────── # If we already read this exact (path, offset, limit) and the From 9a655ff57b2f329d5b769b7e43b63dd118535e3c Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 00:36:12 -0700 Subject: [PATCH 20/63] =?UTF-8?q?chore(release):=20map=20fr@tecompanytea.c?= =?UTF-8?q?om=20=E2=86=92=20ifrederico?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index b880c48f61..fd2f3f1afb 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -98,6 +98,7 @@ AUTHOR_MAP = { "ahmedsherif95@gmail.com": "asheriif", "liujinkun@bytedance.com": "liujinkun2025", "dmayhem93@gmail.com": "dmahan93", + "fr@tecompanytea.com": "ifrederico", "cdanis@gmail.com": "cdanis", "samherring99@gmail.com": "samherring99", "desaiaum08@gmail.com": "Aum08Desai", From 71668559bed7cfea1d37b06031c1d79220c41a27 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 00:38:20 -0700 Subject: [PATCH 21/63] test(copilot-acp): patch HERMES_HOME alongside HOME in hub-block test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit file_safety now uses profile-aware get_hermes_home(), so the test fixture must override HERMES_HOME too — otherwise it resolves to the conftest's isolated tempdir and the hub-cache path doesn't match. --- tests/agent/test_copilot_acp_client.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/agent/test_copilot_acp_client.py b/tests/agent/test_copilot_acp_client.py index ce481a57b9..52ad20a350 100644 --- a/tests/agent/test_copilot_acp_client.py +++ b/tests/agent/test_copilot_acp_client.py @@ -57,7 +57,11 @@ class CopilotACPClientSafetyTests(unittest.TestCase): blocked.parent.mkdir(parents=True, exist_ok=True) blocked.write_text('{"token":"sk-test-secret-1234567890"}') - with patch.dict(os.environ, {"HOME": str(home)}, clear=False): + with patch.dict( + os.environ, + {"HOME": str(home), "HERMES_HOME": str(home / ".hermes")}, + clear=False, + ): response = self._dispatch( { "jsonrpc": "2.0", From 26abac5afd431685e55db1d7b2e12ecc4f3eb064 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 01:33:10 -0700 Subject: [PATCH 22/63] test(conftest): reset module-level state + unset platform allowlists (#13400) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Three fixes that close the remaining structural sources of CI flakes after PR #13363. ## 1. Per-test reset of module-level singletons and ContextVars Python modules are singletons per process, and pytest-xdist workers are long-lived. Module-level dicts/sets and ContextVars persist across tests on the same worker. A test that sets state in `tools.approval._session_approved` and doesn't explicitly clear it leaks that state to every subsequent test on the same worker. New `_reset_module_state` autouse fixture in `tests/conftest.py` clears: - tools.approval: _session_approved, _session_yolo, _permanent_approved, _pending, _gateway_queues, _gateway_notify_cbs, _approval_session_key - tools.interrupt: _interrupted_threads - gateway.session_context: 10 session/cron ContextVars (reset to _UNSET) - tools.env_passthrough: _allowed_env_vars_var (reset to empty set) - tools.credential_files: _registered_files_var (reset to empty dict) - tools.file_tools: _read_tracker, _file_ops_cache This was the single biggest remaining class of CI flakes. `test_command_guards::test_warn_session_approved` and `test_combined_cli_session_approves_both` were failing 12/15 recent main runs specifically because `_session_approved` carried approvals from a prior test's session into these tests' `"default"` session lookup. ## 2. Unset platform allowlist env vars in hermetic fixture `TELEGRAM_ALLOWED_USERS`, `DISCORD_ALLOWED_USERS`, and 20 other `*_ALLOWED_USERS` / `*_ALLOW_ALL_USERS` vars are now unset per-test in the same place credential env vars already are. These aren't credentials but they change gateway auth behavior; if set from any source (user shell, leaky test, CI env) they flake button-authorization tests. Fixes three `test_telegram_approval_buttons` tests that were failing across recent runs of the full gateway directory. ## 3. Two specific tests with module-level captured state - `test_signal::TestSignalPhoneRedaction`: `agent.redact._REDACT_ENABLED` is captured at module import from `HERMES_REDACT_SECRETS`, not read per-call. `monkeypatch.delenv` at test time is too late. Added `monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)` per skill xdist-cross-test-pollution Pattern 5. - `test_internal_event_bypass_pairing::test_non_internal_event_without_user_triggers_pairing`: `gateway.pairing.PAIRING_DIR` is captured at module import from HERMES_HOME, so per-test HERMES_HOME redirection in conftest doesn't retroactively move it. Test now monkeypatches PAIRING_DIR directly to its tmp_path, preventing rate-limit state from prior xdist workers from letting the pairing send-call be suppressed. ## Validation - tests/tools/: 3494 pass (0 fail) including test_command_guards - tests/gateway/: 3504 pass (0 fail) across repeat runs - tests/agent/ + tests/hermes_cli/ + tests/run_agent/ + tests/tools/: 8371 pass, 37 skipped, 0 fail — full suite across directories No production code changed. --- tests/conftest.py | 126 ++++++++++++++++++ .../test_internal_event_bypass_pairing.py | 9 ++ tests/gateway/test_signal.py | 6 + 3 files changed, 141 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index ca4a9a9709..0258e034f9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -186,6 +186,31 @@ _HERMES_BEHAVIORAL_VARS = frozenset({ "HERMES_HOME_MODE", "BROWSER_CDP_URL", "CAMOFOX_URL", + # Platform allowlists — not credentials, but if set from any source + # (user shell, earlier leaky test, CI env), they change gateway auth + # behavior and flake button-authorization tests. + "TELEGRAM_ALLOWED_USERS", + "DISCORD_ALLOWED_USERS", + "WHATSAPP_ALLOWED_USERS", + "SLACK_ALLOWED_USERS", + "SIGNAL_ALLOWED_USERS", + "SIGNAL_GROUP_ALLOWED_USERS", + "EMAIL_ALLOWED_USERS", + "SMS_ALLOWED_USERS", + "MATTERMOST_ALLOWED_USERS", + "MATRIX_ALLOWED_USERS", + "DINGTALK_ALLOWED_USERS", + "FEISHU_ALLOWED_USERS", + "WECOM_ALLOWED_USERS", + "GATEWAY_ALLOWED_USERS", + "GATEWAY_ALLOW_ALL_USERS", + "TELEGRAM_ALLOW_ALL_USERS", + "DISCORD_ALLOW_ALL_USERS", + "WHATSAPP_ALLOW_ALL_USERS", + "SLACK_ALLOW_ALL_USERS", + "SIGNAL_ALLOW_ALL_USERS", + "EMAIL_ALLOW_ALL_USERS", + "SMS_ALLOW_ALL_USERS", }) @@ -258,6 +283,107 @@ def _isolate_hermes_home(_hermetic_environment): return None +# ── Module-level state reset ─────────────────────────────────────────────── +# +# Python modules are singletons per process, and pytest-xdist workers are +# long-lived. Module-level dicts/sets (tool registries, approval state, +# interrupt flags) and ContextVars persist across tests in the same worker, +# causing tests that pass alone to fail when run with siblings. +# +# Each entry in this fixture clears state that belongs to a specific module. +# New state buckets go here too — this is the single gate that prevents +# "works alone, flakes in CI" bugs from state leakage. +# +# The skill `test-suite-cascade-diagnosis` documents the concrete patterns +# this closes; the running example was `test_command_guards` failing 12/15 +# CI runs because ``tools.approval._session_approved`` carried approvals +# from one test's session into another's. + +@pytest.fixture(autouse=True) +def _reset_module_state(): + """Clear module-level mutable state and ContextVars between tests. + + Keeps state from leaking across tests on the same xdist worker. Modules + that don't exist yet (test collection before production import) are + skipped silently — production import later creates fresh empty state. + """ + # --- tools.approval — the single biggest source of cross-test pollution --- + try: + from tools import approval as _approval_mod + _approval_mod._session_approved.clear() + _approval_mod._session_yolo.clear() + _approval_mod._permanent_approved.clear() + _approval_mod._pending.clear() + _approval_mod._gateway_queues.clear() + _approval_mod._gateway_notify_cbs.clear() + # ContextVar: reset to empty string so get_current_session_key() + # falls through to the env var / default path, matching a fresh + # process. + _approval_mod._approval_session_key.set("") + except Exception: + pass + + # --- tools.interrupt — per-thread interrupt flag set --- + try: + from tools import interrupt as _interrupt_mod + with _interrupt_mod._lock: + _interrupt_mod._interrupted_threads.clear() + except Exception: + pass + + # --- gateway.session_context — 9 ContextVars that represent + # the active gateway session. If set in one test and not reset, + # the next test's get_session_env() reads stale values. + try: + from gateway import session_context as _sc_mod + for _cv in ( + _sc_mod._SESSION_PLATFORM, + _sc_mod._SESSION_CHAT_ID, + _sc_mod._SESSION_CHAT_NAME, + _sc_mod._SESSION_THREAD_ID, + _sc_mod._SESSION_USER_ID, + _sc_mod._SESSION_USER_NAME, + _sc_mod._SESSION_KEY, + _sc_mod._CRON_AUTO_DELIVER_PLATFORM, + _sc_mod._CRON_AUTO_DELIVER_CHAT_ID, + _sc_mod._CRON_AUTO_DELIVER_THREAD_ID, + ): + _cv.set(_sc_mod._UNSET) + except Exception: + pass + + # --- tools.env_passthrough — ContextVar with no default --- + # LookupError is normal if the test never set it. Setting it to an + # empty set unconditionally normalizes the starting state. + try: + from tools import env_passthrough as _envp_mod + _envp_mod._allowed_env_vars_var.set(set()) + except Exception: + pass + + # --- tools.credential_files — ContextVar --- + try: + from tools import credential_files as _credf_mod + _credf_mod._registered_files_var.set({}) + except Exception: + pass + + # --- tools.file_tools — per-task read history + file-ops cache --- + # _read_tracker accumulates per-task_id read history for loop detection, + # capped by _READ_HISTORY_CAP. If entries from a prior test persist, the + # cap is hit faster than expected and capacity-related tests flake. + try: + from tools import file_tools as _ft_mod + with _ft_mod._read_tracker_lock: + _ft_mod._read_tracker.clear() + with _ft_mod._file_ops_lock: + _ft_mod._file_ops_cache.clear() + except Exception: + pass + + yield + + @pytest.fixture() def tmp_dir(tmp_path): """Provide a temporary directory that is cleaned up automatically.""" diff --git a/tests/gateway/test_internal_event_bypass_pairing.py b/tests/gateway/test_internal_event_bypass_pairing.py index d10195b2d5..8878842538 100644 --- a/tests/gateway/test_internal_event_bypass_pairing.py +++ b/tests/gateway/test_internal_event_bypass_pairing.py @@ -355,8 +355,17 @@ async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path): """Verify the normal (non-internal) path still triggers pairing for unknown users.""" import gateway.run as gateway_run + import gateway.pairing as pairing_mod monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path) + # gateway.pairing.PAIRING_DIR is a module-level constant captured at + # import time from whichever HERMES_HOME was set then. Per-test + # HERMES_HOME redirection in conftest doesn't retroactively move it. + # Override directly so pairing rate-limit state lives in this test's + # tmp_path (and so stale state from prior xdist workers can't leak in). + pairing_dir = tmp_path / "pairing" + pairing_dir.mkdir() + monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir) (tmp_path / "config.yaml").write_text("", encoding="utf-8") # Clear env vars that could let all users through (loaded by diff --git a/tests/gateway/test_signal.py b/tests/gateway/test_signal.py index d7943b7f92..b51ec713f2 100644 --- a/tests/gateway/test_signal.py +++ b/tests/gateway/test_signal.py @@ -306,7 +306,13 @@ class TestSignalSessionSource: class TestSignalPhoneRedaction: @pytest.fixture(autouse=True) def _ensure_redaction_enabled(self, monkeypatch): + # agent.redact snapshots _REDACT_ENABLED at import time from the + # HERMES_REDACT_SECRETS env var. monkeypatch.delenv is too late — + # the module was already imported during test collection with + # whatever value was in the env then. Force the flag directly. + # See skill: xdist-cross-test-pollution Pattern 5. monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False) + monkeypatch.setattr("agent.redact._REDACT_ENABLED", True) def test_us_number(self): from agent.redact import redact_sensitive_text From b341b19fff3518e7ea48ba3ac8bedaf7fe07fb54 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 01:34:50 -0700 Subject: [PATCH 23/63] fix(auth): hermes auth remove sticks for shell-exported env vars (#13418) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Removing an env-seeded credential only cleared ~/.hermes/.env and the current process's os.environ, leaving shell-exported vars (shell profile, systemd EnvironmentFile, launchd plist) to resurrect the entry on the next load_pool() call. This matched the pre-#11485 codex behaviour. Now we suppress env: in auth.json on remove, gate _seed_from_env() behind is_source_suppressed(), clear env:* suppressions on auth add, and print a diagnostic pointing at the shell when the var lives there. Applies to every env:* seeded credential (xai, deepseek, moonshot, zai, nvidia, openrouter, anthropic, etc.), not just xai. Reported by @teknium1 from community user 'Artificial Brain' — couldn't remove their xAI key via hermes auth remove. --- agent/credential_pool.py | 14 ++ hermes_cli/auth_commands.py | 62 ++++++++- tests/hermes_cli/test_auth_commands.py | 174 +++++++++++++++++++++++++ 3 files changed, 248 insertions(+), 2 deletions(-) diff --git a/agent/credential_pool.py b/agent/credential_pool.py index b02514e990..0d9776a397 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -1158,10 +1158,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]: changed = False active_sources: Set[str] = set() + # Honour user suppression — `hermes auth remove ` for an + # env-seeded credential marks the env: source as suppressed so it + # won't be re-seeded from the user's shell environment or ~/.hermes/.env. + # Without this gate the removal is silently undone on the next + # load_pool() call whenever the var is still exported by the shell. + try: + from hermes_cli.auth import is_source_suppressed as _is_source_suppressed + except ImportError: + def _is_source_suppressed(_p, _s): # type: ignore[misc] + return False if provider == "openrouter": token = os.getenv("OPENROUTER_API_KEY", "").strip() if token: source = "env:OPENROUTER_API_KEY" + if _is_source_suppressed(provider, source): + return changed, active_sources active_sources.add(source) changed |= _upsert_entry( entries, @@ -1198,6 +1210,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool if not token: continue source = f"env:{env_var}" + if _is_source_suppressed(provider, source): + continue active_sources.add(source) auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY base_url = env_url or pconfig.inference_base_url diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index 30e5182949..4fe5f3f2e4 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -152,6 +152,22 @@ def auth_add_command(args) -> None: pool = load_pool(provider) + # Clear any env: suppressions for this provider — re-adding a + # credential is a strong signal the user wants auth for this provider + # re-enabled. Matches the Codex device_code re-link pattern below. + if not provider.startswith(CUSTOM_POOL_PREFIX): + try: + from hermes_cli.auth import ( + _load_auth_store, + unsuppress_credential_source, + ) + suppressed = _load_auth_store().get("suppressed_sources", {}) + for src in list(suppressed.get(provider, []) or []): + if src.startswith("env:"): + unsuppress_credential_source(provider, src) + except Exception: + pass + if requested_type == AUTH_TYPE_API_KEY: token = (getattr(args, "api_key", None) or "").strip() if not token: @@ -339,14 +355,56 @@ def auth_remove_command(args) -> None: print(f"Removed {provider} credential #{index} ({removed.label})") # If this was an env-seeded credential, also clear the env var from .env - # so it doesn't get re-seeded on the next load_pool() call. + # so it doesn't get re-seeded on the next load_pool() call. If the env + # var is also (or only) exported by the user's shell/systemd, .env + # cleanup alone is not enough — the next process to call load_pool() + # will re-read os.environ and resurrect the entry. Suppress the + # env: source so _seed_from_env() skips it, and tell the user + # where the shell-level copy is still living so they can remove it. if removed.source.startswith("env:"): + import os as _os env_var = removed.source[len("env:"):] if env_var: - from hermes_cli.config import remove_env_value + from hermes_cli.config import get_env_path, remove_env_value + from hermes_cli.auth import suppress_credential_source + + # Detect whether the var lives in .env, the shell env, or both, + # BEFORE remove_env_value() mutates os.environ. + env_in_process = bool(_os.getenv(env_var)) + env_in_dotenv = False + try: + env_path = get_env_path() + if env_path.exists(): + env_in_dotenv = any( + line.strip().startswith(f"{env_var}=") + for line in env_path.read_text(errors="replace").splitlines() + ) + except OSError: + pass + shell_exported = env_in_process and not env_in_dotenv + cleared = remove_env_value(env_var) if cleared: print(f"Cleared {env_var} from .env") + suppress_credential_source(provider, removed.source) + if shell_exported: + print( + f"Note: {env_var} is still set in your shell environment " + f"(not in ~/.hermes/.env)." + ) + print( + " Unset it there (shell profile, systemd EnvironmentFile, " + "launchd plist, etc.) or it will keep being visible to Hermes." + ) + print( + f" The pool entry is now suppressed — Hermes will ignore " + f"{env_var} until you run `hermes auth add {provider}`." + ) + else: + print( + f"Suppressed env:{env_var} — it will not be re-seeded even " + f"if the variable is re-exported later." + ) # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce), # clear the underlying auth store / credential file so it doesn't get diff --git a/tests/hermes_cli/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py index 5b0d9062b9..a017185573 100644 --- a/tests/hermes_cli/test_auth_commands.py +++ b/tests/hermes_cli/test_auth_commands.py @@ -1011,3 +1011,177 @@ def test_seed_from_singletons_respects_codex_suppression(tmp_path, monkeypatch): # Verify the auth store was NOT modified (no auto-import happened) after = json.loads((hermes_home / "auth.json").read_text()) assert "openai-codex" not in after.get("providers", {}) + + +def test_auth_remove_env_seeded_suppresses_shell_exported_var(tmp_path, monkeypatch, capsys): + """`hermes auth remove xai 1` must stick even when the env var is exported + by the shell (not written into ~/.hermes/.env). Before PR for #13371 the + removal silently restored on next load_pool() because _seed_from_env() + re-read os.environ. Now env: is suppressed in auth.json. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Simulate shell export (NOT written to .env) + monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export") + (hermes_home / ".env").write_text("") + + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "xai": [{ + "id": "env-1", + "label": "XAI_API_KEY", + "auth_type": "api_key", + "priority": 0, + "source": "env:XAI_API_KEY", + "access_token": "sk-xai-shell-export", + "base_url": "https://api.x.ai/v1", + }] + }, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth_commands import auth_remove_command + auth_remove_command(SimpleNamespace(provider="xai", target="1")) + + # Suppression marker written + after = json.loads((hermes_home / "auth.json").read_text()) + assert "env:XAI_API_KEY" in after.get("suppressed_sources", {}).get("xai", []) + + # Diagnostic printed pointing at the shell + out = capsys.readouterr().out + assert "still set in your shell environment" in out + assert "Cleared XAI_API_KEY from .env" not in out # wasn't in .env + + # Fresh simulation: shell re-exports, reload pool + monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export") + from agent.credential_pool import load_pool + pool = load_pool("xai") + assert not pool.has_credentials(), "pool must stay empty — env:XAI_API_KEY suppressed" + + +def test_auth_remove_env_seeded_dotenv_only_no_shell_hint(tmp_path, monkeypatch, capsys): + """When the env var lives only in ~/.hermes/.env (not the shell), the + shell-hint should NOT be printed — avoid scaring the user about a + non-existent shell export. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + # Key ONLY in .env, shell must not have it + monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False) + (hermes_home / ".env").write_text("DEEPSEEK_API_KEY=sk-ds-only\n") + # Mimic load_env() populating os.environ + monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-ds-only") + + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "deepseek": [{ + "id": "env-1", + "label": "DEEPSEEK_API_KEY", + "auth_type": "api_key", + "priority": 0, + "source": "env:DEEPSEEK_API_KEY", + "access_token": "sk-ds-only", + }] + }, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth_commands import auth_remove_command + auth_remove_command(SimpleNamespace(provider="deepseek", target="1")) + + out = capsys.readouterr().out + assert "Cleared DEEPSEEK_API_KEY from .env" in out + assert "still set in your shell environment" not in out + assert (hermes_home / ".env").read_text().strip() == "" + + +def test_auth_add_clears_env_suppression_for_provider(tmp_path, monkeypatch): + """Re-adding a credential via `hermes auth add ` clears any + env: suppression marker — strong signal the user wants auth back. + Matches the Codex device_code re-link behaviour. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.delenv("XAI_API_KEY", raising=False) + + _write_auth_store( + tmp_path, + { + "version": 1, + "providers": {}, + "suppressed_sources": {"xai": ["env:XAI_API_KEY"]}, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth import is_source_suppressed + from hermes_cli.auth_commands import auth_add_command + + assert is_source_suppressed("xai", "env:XAI_API_KEY") is True + auth_add_command(SimpleNamespace( + provider="xai", auth_type="api_key", + api_key="sk-xai-manual", label="manual", + )) + assert is_source_suppressed("xai", "env:XAI_API_KEY") is False + + +def test_seed_from_env_respects_env_suppression(tmp_path, monkeypatch): + """_seed_from_env() must skip env: sources that the user suppressed + via `hermes auth remove`. This is the gate that prevents shell-exported + keys from resurrecting removed credentials. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export") + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {"xai": ["env:XAI_API_KEY"]}, + })) + + from agent.credential_pool import _seed_from_env + + entries = [] + changed, active = _seed_from_env("xai", entries) + assert changed is False + assert entries == [] + assert active == set() + + +def test_seed_from_env_respects_openrouter_suppression(tmp_path, monkeypatch): + """OpenRouter is the special-case branch in _seed_from_env; verify it + honours suppression too. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-shell-export") + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {"openrouter": ["env:OPENROUTER_API_KEY"]}, + })) + + from agent.credential_pool import _seed_from_env + + entries = [] + changed, active = _seed_from_env("openrouter", entries) + assert changed is False + assert entries == [] + assert active == set() From d1ed6f4fb44c08e130f043ee3d1d12e7a3b8073e Mon Sep 17 00:00:00 2001 From: Franci Penov Date: Wed, 1 Apr 2026 09:12:44 -0700 Subject: [PATCH 24/63] feat(cli): add numbered keyboard shortcuts to approval and clarify prompts --- cli.py | 123 +++++++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 107 insertions(+), 16 deletions(-) diff --git a/cli.py b/cli.py index b5dc6c7c19..48af2c69fb 100644 --- a/cli.py +++ b/cli.py @@ -8016,8 +8016,18 @@ class HermesCLI: choice_wrapped: list[tuple[int, str]] = [] for i, choice in enumerate(choices): label = choice_labels.get(choice, choice) - prefix = '❯ ' if i == selected else ' ' - for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "): + # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item) + if i < 9: + num_prefix = str(i + 1) + elif i == 9: + num_prefix = '0' + else: + num_prefix = ' ' # No number for items beyond 10th + if i == selected: + prefix = f'❯ {num_prefix}. ' + else: + prefix = f' {num_prefix}. ' + for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "): choice_wrapped.append((i, wrapped)) # Budget vertical space so HSplit never clips the command or choices. @@ -9192,6 +9202,29 @@ class HermesCLI: self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1) event.app.invalidate() + # Number keys for quick clarify selection (1-9, 0 for 10th item) + def _make_clarify_number_handler(idx): + def handler(event): + if self._clarify_state and not self._clarify_freetext: + choices = self._clarify_state.get("choices") or [] + # Map index to choice (treating "Other" as the last option) + if idx < len(choices): + # Select a numbered choice + self._clarify_state["response_queue"].put(choices[idx]) + self._clarify_state = None + self._clarify_freetext = False + event.app.invalidate() + elif idx == len(choices): + # Select "Other" option + self._clarify_freetext = True + event.app.invalidate() + return handler + + for _num in range(10): + # 1-9 select items 0-8, 0 selects item 9 (10thitem) + _idx = 9 if _num == 0 else _num - 1 + kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx)) + # --- Dangerous command approval: arrow-key navigation --- @kb.add('up', filter=Condition(lambda: bool(self._approval_state))) @@ -9233,6 +9266,20 @@ class HermesCLI: event.app.current_buffer.reset() event.app.invalidate() + # Number keys for quick approval selection (1-9, 0 for 10th item) + def _make_approval_number_handler(idx): + def handler(event): + if self._approval_state and idx < len(self._approval_state["choices"]): + self._approval_state["selected"] = idx + self._handle_approval_selection() + event.app.invalidate() + return handler + + for _num in range(10): + # 1-9 select items 0-8, 0 selects item 9 (10th item) + _idx = 9 if _num == 0 else _num - 1 + kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx)) + # --- History navigation: up/down browse history in normal input mode --- # The TextArea is multiline, so by default up/down only move the cursor. # Buffer.auto_up/auto_down handle both: cursor movement when multi-line, @@ -9801,14 +9848,32 @@ class HermesCLI: selected = state.get("selected", 0) preview_lines = _wrap_panel_text(question, 60) for i, choice in enumerate(choices): - prefix = "❯ " if i == selected and not cli_ref._clarify_freetext else " " - preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" ")) + # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item) + if i < 9: + num_prefix = str(i + 1) + elif i == 9: + num_prefix = '0' + else: + num_prefix = ' ' + if i == selected and not cli_ref._clarify_freetext: + prefix = f"❯ {num_prefix}. " + else: + prefix = f" {num_prefix}. " + preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" ")) + # "Other" option in preview + other_num = len(choices) + 1 + if other_num < 10: + other_num_prefix = str(other_num) + elif other_num == 10: + other_num_prefix = '0' + else: + other_num_prefix = ' ' other_label = ( - "❯ Other (type below)" if cli_ref._clarify_freetext - else "❯ Other (type your answer)" if selected == len(choices) - else " Other (type your answer)" + f"❯ {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext + else f"❯ {other_num_prefix}. Other (type your answer)" if selected == len(choices) + else f" {other_num_prefix}. Other (type your answer)" ) - preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" ")) + preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" ")) box_width = _panel_box_width("Hermes needs your input", preview_lines) inner_text_width = max(8, box_width - 2) @@ -9816,18 +9881,35 @@ class HermesCLI: choice_wrapped: list[tuple[int, str]] = [] if choices: for i, choice in enumerate(choices): - prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else ' ' - for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "): + # Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item) + if i < 9: + num_prefix = str(i + 1) + elif i == 9: + num_prefix = '0' + else: + num_prefix = ' ' + if i == selected and not cli_ref._clarify_freetext: + prefix = f'❯ {num_prefix}. ' + else: + prefix = f' {num_prefix}. ' + for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "): choice_wrapped.append((i, wrapped)) # Trailing Other row(s) other_idx = len(choices) - if selected == other_idx and not cli_ref._clarify_freetext: - other_label_mand = '❯ Other (type your answer)' - elif cli_ref._clarify_freetext: - other_label_mand = '❯ Other (type below)' + other_num = other_idx + 1 + if other_num < 10: + other_num_prefix = str(other_num) + elif other_num == 10: + other_num_prefix = '0' else: - other_label_mand = ' Other (type your answer)' - other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ") + other_num_prefix = ' ' + if selected == other_idx and not cli_ref._clarify_freetext: + other_label_mand = f'❯ {other_num_prefix}. Other (type your answer)' + elif cli_ref._clarify_freetext: + other_label_mand = f'❯ {other_num_prefix}. Other (type below)' + else: + other_label_mand = f' {other_num_prefix}. Other (type your answer)' + other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ") elif cli_ref._clarify_freetext: # Freetext-only mode: the guidance line takes the place of choices. other_wrapped = _wrap_panel_text( @@ -9892,6 +9974,15 @@ class HermesCLI: # "Other" option (trailing row(s), only shown when choices exist) other_idx = len(choices) + # Calculate number prefix for "Other" option + other_num = other_idx + 1 + if other_num < 10: + other_num_prefix = str(other_num) + elif other_num == 10: + other_num_prefix = '0' + else: + other_num_prefix = ' ' + if selected == other_idx and not cli_ref._clarify_freetext: other_style = 'class:clarify-selected' elif cli_ref._clarify_freetext: From 65c2a6b27f6e3bf4441ed063fd96611eacf0aa88 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 00:52:03 -0700 Subject: [PATCH 25/63] chore(release): add francip to AUTHOR_MAP --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index fd2f3f1afb..481f2f4467 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -55,6 +55,7 @@ AUTHOR_MAP = { "185121704+stablegenius49@users.noreply.github.com": "stablegenius49", "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit", "valdi.jorge@gmail.com": "jvcl", + "francip@gmail.com": "francip", "oussama.redcode@gmail.com": "mavrickdeveloper", "126368201+vilkasdev@users.noreply.github.com": "vilkasdev", "137614867+cutepawss@users.noreply.github.com": "cutepawss", From e50e7f11bc80f72f9a4eaac217343d75f8ede677 Mon Sep 17 00:00:00 2001 From: Omni Comelse Date: Sun, 22 Mar 2026 14:57:22 +0100 Subject: [PATCH 26/63] feat(skills): add adversarial-ux-test optional skill Adds a structured adversarial UX testing skill that roleplays the worst-case user for any product. Uses a 6-step workflow: 1. Define a specific grumpy persona (age 50+, tech-resistant) 2. Browse the app in-character attempting real tasks 3. Write visceral in-character feedback (the Rant) 4. Apply a pragmatism filter (RED/YELLOW/WHITE/GREEN classification) 5. Create tickets only for real issues (RED + GREEN) 6. Deliver a structured report with screenshots The pragmatism filter is the key differentiator - it prevents raw persona complaints from becoming tickets, separating genuine UX problems from "I hate computers" noise. Includes example personas for 8 industry verticals and practical tips from real-world testing sessions. Ref: https://x.com/Teknium/status/2035708510034641202 --- optional-skills/dogfood/DESCRIPTION.md | 3 + .../dogfood/adversarial-ux-test/SKILL.md | 190 ++++++++++++++++++ 2 files changed, 193 insertions(+) create mode 100644 optional-skills/dogfood/DESCRIPTION.md create mode 100644 optional-skills/dogfood/adversarial-ux-test/SKILL.md diff --git a/optional-skills/dogfood/DESCRIPTION.md b/optional-skills/dogfood/DESCRIPTION.md new file mode 100644 index 0000000000..f083fd72bd --- /dev/null +++ b/optional-skills/dogfood/DESCRIPTION.md @@ -0,0 +1,3 @@ +# Dogfood — Advanced QA & Testing Skills + +Specialized QA workflows that go beyond basic bug-finding. These skills use structured methodologies to surface UX friction, accessibility issues, and product-level problems that standard testing misses. diff --git a/optional-skills/dogfood/adversarial-ux-test/SKILL.md b/optional-skills/dogfood/adversarial-ux-test/SKILL.md new file mode 100644 index 0000000000..1777e083d1 --- /dev/null +++ b/optional-skills/dogfood/adversarial-ux-test/SKILL.md @@ -0,0 +1,190 @@ +--- +name: adversarial-ux-test +description: Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable tickets from genuine issues only. +version: 1.0.0 +author: Omni @ Comelse +license: MIT +metadata: + hermes: + tags: [qa, ux, testing, adversarial, dogfood, personas, user-testing] + related_skills: [dogfood] +--- + +# Adversarial UX Test + +Roleplay the worst-case user for your product — the person who hates technology, doesn't want your software, and will find every reason to complain. Then filter their feedback through a pragmatism layer to separate real UX problems from "I hate computers" noise. + +Think of it as an automated "mom test" — but angry. + +## Why This Works + +Most QA finds bugs. This finds **friction**. A technically correct app can still be unusable for real humans. The adversarial persona catches: +- Confusing terminology that makes sense to developers but not users +- Too many steps to accomplish basic tasks +- Missing onboarding or "aha moments" +- Accessibility issues (font size, contrast, click targets) +- Cold-start problems (empty states, no demo content) +- Paywall/signup friction that kills conversion + +The **pragmatism filter** (Phase 3) is what makes this useful instead of just entertaining. Without it, you'd add a "print this page" button to every screen because Grandpa can't figure out PDFs. + +## How to Use + +Tell the agent: +``` +"Run an adversarial UX test on [URL]" +"Be a grumpy [persona type] and test [app name]" +"Do an asshole user test on my staging site" +``` + +You can provide a persona or let the agent generate one based on your product's target audience. + +## Step 1: Define the Persona + +If no persona is provided, generate one by answering: + +1. **Who is the HARDEST user for this product?** (age 50+, non-technical role, decades of experience doing it "the old way") +2. **What is their tech comfort level?** (the lower the better — WhatsApp-only, paper notebooks, wife set up their email) +3. **What is the ONE thing they need to accomplish?** (their core job, not your feature list) +4. **What would make them give up?** (too many clicks, jargon, slow, confusing) +5. **How do they talk when frustrated?** (blunt, sweary, dismissive, sighing) + +### Good Persona Example +> **"Big Mick" McAllister** — 58-year-old S&C coach. Uses WhatsApp and that's it. His "spreadsheet" is a paper notebook. "If I can't figure it out in 10 seconds I'm going back to my notebook." Needs to log session results for 25 players. Hates small text, jargon, and passwords. + +### Bad Persona Example +> "A user who doesn't like the app" — too vague, no constraints, no voice. + +The persona must be **specific enough to stay in character** for 20 minutes of testing. + +## Step 2: Become the Asshole (Browse as the Persona) + +1. Read any available project docs for app context and URLs +2. **Fully inhabit the persona** — their frustrations, limitations, goals +3. Navigate to the app using browser tools +4. **Attempt the persona's ACTUAL TASKS** (not a feature tour): + - Can they do what they came to do? + - How many clicks/screens to accomplish it? + - What confuses them? + - What makes them angry? + - Where do they get lost? + - What would make them give up and go back to their old way? + +5. Test these friction categories: + - **First impression** — would they even bother past the landing page? + - **Core workflow** — the ONE thing they need to do most often + - **Error recovery** — what happens when they do something wrong? + - **Readability** — text size, contrast, information density + - **Speed** — does it feel faster than their current method? + - **Terminology** — any jargon they wouldn't understand? + - **Navigation** — can they find their way back? do they know where they are? + +6. Take screenshots of every pain point +7. Check browser console for JS errors on every page + +## Step 3: The Rant (Write Feedback in Character) + +Write the feedback AS THE PERSONA — in their voice, with their frustrations. This is not a bug report. This is a real human venting. + +``` +[PERSONA NAME]'s Review of [PRODUCT] + +Overall: [Would they keep using it? Yes/No/Maybe with conditions] + +THE GOOD (grudging admission): +- [things even they have to admit work] + +THE BAD (legitimate UX issues): +- [real problems that would stop them from using the product] + +THE UGLY (showstoppers): +- [things that would make them uninstall/cancel immediately] + +SPECIFIC COMPLAINTS: +1. [Page/feature]: "[quote in persona voice]" — [what happened, expected] +2. ... + +VERDICT: "[one-line persona quote summarizing their experience]" +``` + +## Step 4: The Pragmatism Filter (Critical — Do Not Skip) + +Step OUT of the persona. Evaluate each complaint as a product person: + +- **RED: REAL UX BUG** — Any user would have this problem, not just grumpy ones. Fix it. +- **YELLOW: VALID BUT LOW PRIORITY** — Real issue but only for extreme users. Note it. +- **WHITE: PERSONA NOISE** — "I hate computers" talking, not a product problem. Skip it. +- **GREEN: FEATURE REQUEST** — Good idea hidden in the complaint. Consider it. + +### Filter Criteria +1. Would a 35-year-old competent-but-busy user have the same complaint? → RED +2. Is this a genuine accessibility issue (font size, contrast, click targets)? → RED +3. Is this "I want it to work like paper" resistance to digital? → WHITE +4. Is this a real workflow inefficiency the persona stumbled on? → YELLOW or RED +5. Would fixing this add complexity for the 80% who are fine? → WHITE +6. Does the complaint reveal a missing onboarding moment? → GREEN + +**This filter is MANDATORY.** Never ship raw persona complaints as tickets. + +## Step 5: Create Tickets + +For **RED** and **GREEN** items only: +- Clear, actionable title +- Include the persona's verbatim quote (entertaining + memorable) +- The real UX issue underneath (objective) +- A suggested fix (actionable) +- Tag/label: "ux-review" + +For **YELLOW** items: one catch-all ticket with all notes. + +**WHITE** items appear in the report only. No tickets. + +**Max 10 tickets per session** — focus on the worst issues. + +## Step 6: Report + +Deliver: +1. The persona rant (Step 3) — entertaining and visceral +2. The filtered assessment (Step 4) — pragmatic and actionable +3. Tickets created (Step 5) — with links +4. Screenshots of key issues + +## Tips + +- **One persona per session.** Don't mix perspectives. +- **Stay in character during Steps 2-3.** Break character only at Step 4. +- **Test the CORE WORKFLOW first.** Don't get distracted by settings pages. +- **Empty states are gold.** New user experience reveals the most friction. +- **The best findings are RED items the persona found accidentally** while trying to do something else. +- **If the persona has zero complaints, your persona is too tech-savvy.** Make them older, less patient, more set in their ways. +- **Run this before demos, launches, or after shipping a batch of features.** +- **Register as a NEW user when possible.** Don't use pre-seeded admin accounts — the cold start experience is where most friction lives. +- **Zero WHITE items is a signal, not a failure.** If the pragmatism filter finds no noise, your product has real UX problems, not just a grumpy persona. +- **Check known issues in project docs AFTER the test.** If the persona found a bug that's already in the known issues list, that's actually the most damning finding — it means the team knew about it but never felt the user's pain. +- **Subscription/paywall testing is critical.** Test with expired accounts, not just active ones. The "what happens when you can't pay" experience reveals whether the product respects users or holds their data hostage. +- **Count the clicks to accomplish the persona's ONE task.** If it's more than 5, that's almost always a RED finding regardless of persona tech level. + +## Example Personas by Industry + +These are starting points — customize for your specific product: + +| Product Type | Persona | Age | Key Trait | +|-------------|---------|-----|-----------| +| CRM | Retirement home director | 68 | Filing cabinet is the current CRM | +| Photography SaaS | Rural wedding photographer | 62 | Books clients by phone, invoices on paper | +| AI/ML Tool | Department store buyer | 55 | Burned by 3 failed tech startups | +| Fitness App | Old-school gym coach | 58 | Paper notebook, thick fingers, bad eyes | +| Accounting | Family bakery owner | 64 | Shoebox of receipts, hates subscriptions | +| E-commerce | Market stall vendor | 60 | Cash only, smartphone is for calls | +| Healthcare | Senior GP | 63 | Dictates notes, nurse handles the computer | +| Education | Veteran teacher | 57 | Chalk and talk, worksheets in ring binders | + +## Rules + +- Stay in character during Steps 2-3 +- Be genuinely mean but fair — find real problems, not manufactured ones +- The pragmatism filter (Step 4) is **MANDATORY** +- Screenshots required for every complaint +- Max 10 tickets per session +- Test on staging/deployed app, not local dev +- One persona, one session, one report From e0dc0a88d3218980bdcc888c2741a27ab332eff7 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 01:46:59 -0700 Subject: [PATCH 27/63] chore: attribution + catalog rows for adversarial-ux-test - AUTHOR_MAP: omni@comelse.com -> omnissiah-comelse - skills-catalog.md: add adversarial-ux-test row under dogfood - optional-skills-catalog.md: add new Dogfood section --- scripts/release.py | 1 + website/docs/reference/optional-skills-catalog.md | 6 ++++++ website/docs/reference/skills-catalog.md | 1 + 3 files changed, 8 insertions(+) diff --git a/scripts/release.py b/scripts/release.py index 481f2f4467..c8ceed0867 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -56,6 +56,7 @@ AUTHOR_MAP = { "101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit", "valdi.jorge@gmail.com": "jvcl", "francip@gmail.com": "francip", + "omni@comelse.com": "omnissiah-comelse", "oussama.redcode@gmail.com": "mavrickdeveloper", "126368201+vilkasdev@users.noreply.github.com": "vilkasdev", "137614867+cutepawss@users.noreply.github.com": "cutepawss", diff --git a/website/docs/reference/optional-skills-catalog.md b/website/docs/reference/optional-skills-catalog.md index 9cb1f386b8..ab48e036dd 100644 --- a/website/docs/reference/optional-skills-catalog.md +++ b/website/docs/reference/optional-skills-catalog.md @@ -58,6 +58,12 @@ hermes skills uninstall | **meme-generation** | Generate real meme images by picking a template and overlaying text with Pillow. Produces actual `.png` meme files. | | **touchdesigner-mcp** | Control a running TouchDesigner instance via the twozero MCP plugin — create operators, set parameters, wire connections, execute Python, build real-time audio-reactive visuals and GLSL networks. 36 native tools. | +## Dogfood + +| Skill | Description | +|-------|-------------| +| **adversarial-ux-test** | Roleplay the most difficult, tech-resistant user for a product — browse in-persona, rant, then filter through a RED/YELLOW/WHITE/GREEN pragmatism layer so only real UX friction becomes tickets. | + ## DevOps | Skill | Description | diff --git a/website/docs/reference/skills-catalog.md b/website/docs/reference/skills-catalog.md index 46c29929f9..301d7ee545 100644 --- a/website/docs/reference/skills-catalog.md +++ b/website/docs/reference/skills-catalog.md @@ -69,6 +69,7 @@ Internal dogfooding and QA skills used to test Hermes Agent itself. | Skill | Description | Path | |-------|-------------|------| | `dogfood` | Systematic exploratory QA testing of web applications — find bugs, capture evidence, and generate structured reports | `dogfood` | +| `adversarial-ux-test` | Roleplay the most difficult, tech-resistant user for a product — browse in-persona, rant, then filter through a RED/YELLOW/WHITE/GREEN pragmatism layer so only real UX friction becomes tickets. | `dogfood/adversarial-ux-test` | ## email From 2c69b3eca8187013223677985c013e714e5c1d70 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 01:52:49 -0700 Subject: [PATCH 28/63] =?UTF-8?q?fix(auth):=20unify=20credential=20source?= =?UTF-8?q?=20removal=20=E2=80=94=20every=20source=20sticks=20(#13427)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Every credential source Hermes reads from now behaves identically on `hermes auth remove`: the pool entry stays gone across fresh load_pool() calls, even when the underlying external state (env var, OAuth file, auth.json block, config entry) is still present. Before this, auth_remove_command was a 110-line if/elif with five special cases, and three more sources (qwen-cli, copilot, custom config) had no removal handler at all — their pool entries silently resurrected on the next invocation. Even the handled cases diverged: codex suppressed, anthropic deleted-without-suppressing, nous cleared without suppressing. Each new provider added a new gap. What's new: agent/credential_sources.py — RemovalStep registry, one entry per source (env, claude_code, hermes_pkce, nous device_code, codex device_code, qwen-cli, copilot gh_cli + env vars, custom config). auth_remove_command dispatches uniformly via find_removal_step(). Changes elsewhere: agent/credential_pool.py — every upsert in _seed_from_env, _seed_from_singletons, and _seed_custom_pool now gates on is_source_suppressed(provider, source) via a shared helper. hermes_cli/auth_commands.py — auth_remove_command reduced to 25 lines of dispatch; auth_add_command now clears ALL suppressions for the provider on re-add (was env:* only). Copilot is special: the same token is seeded twice (gh_cli via _seed_from_singletons + env: via _seed_from_env), so removing one entry without suppressing the other variants lets the duplicate resurrect. The copilot RemovalStep suppresses gh_cli + all three env variants (COPILOT_GITHUB_TOKEN, GH_TOKEN, GITHUB_TOKEN) at once. Tests: 11 new unit tests + 4059 existing pass. 12 E2E scenarios cover every source in isolated HERMES_HOME with simulated fresh processes. --- agent/credential_pool.py | 146 ++++----- agent/credential_sources.py | 401 +++++++++++++++++++++++++ hermes_cli/auth_commands.py | 136 ++------- tests/hermes_cli/test_auth_commands.py | 289 ++++++++++++++++++ 4 files changed, 793 insertions(+), 179 deletions(-) create mode 100644 agent/credential_sources.py diff --git a/agent/credential_pool.py b/agent/credential_pool.py index 0d9776a397..de8d03185a 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -983,6 +983,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup active_sources: Set[str] = set() auth_store = _load_auth_store() + # Shared suppression gate — used at every upsert site so + # `hermes auth remove ` is stable across all source types. + try: + from hermes_cli.auth import is_source_suppressed as _is_suppressed + except ImportError: + def _is_suppressed(_p, _s): # type: ignore[misc] + return False + if provider == "anthropic": # Only auto-discover external credentials (Claude Code, Hermes PKCE) # when the user has explicitly configured anthropic as their provider. @@ -1002,13 +1010,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup ("claude_code", read_claude_code_credentials()), ): if creds and creds.get("accessToken"): - # Check if user explicitly removed this source - try: - from hermes_cli.auth import is_source_suppressed - if is_source_suppressed(provider, source_name): - continue - except ImportError: - pass + if _is_suppressed(provider, source_name): + continue active_sources.add(source_name) changed |= _upsert_entry( entries, @@ -1026,7 +1029,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup elif provider == "nous": state = _load_provider_state(auth_store, "nous") - if state: + if state and not _is_suppressed(provider, "device_code"): active_sources.add("device_code") # Prefer a user-supplied label embedded in the singleton state # (set by persist_nous_credentials(label=...) when the user ran @@ -1067,20 +1070,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup token, source = resolve_copilot_token() if token: source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}" - active_sources.add(source_name) - pconfig = PROVIDER_REGISTRY.get(provider) - changed |= _upsert_entry( - entries, - provider, - source_name, - { - "source": source_name, - "auth_type": AUTH_TYPE_API_KEY, - "access_token": token, - "base_url": pconfig.inference_base_url if pconfig else "", - "label": source, - }, - ) + if not _is_suppressed(provider, source_name): + active_sources.add(source_name) + pconfig = PROVIDER_REGISTRY.get(provider) + changed |= _upsert_entry( + entries, + provider, + source_name, + { + "source": source_name, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": token, + "base_url": pconfig.inference_base_url if pconfig else "", + "label": source, + }, + ) except Exception as exc: logger.debug("Copilot token seed failed: %s", exc) @@ -1096,20 +1100,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup token = creds.get("api_key", "") if token: source_name = creds.get("source", "qwen-cli") - active_sources.add(source_name) - changed |= _upsert_entry( - entries, - provider, - source_name, - { - "source": source_name, - "auth_type": AUTH_TYPE_OAUTH, - "access_token": token, - "expires_at_ms": creds.get("expires_at_ms"), - "base_url": creds.get("base_url", ""), - "label": creds.get("auth_file", source_name), - }, - ) + if not _is_suppressed(provider, source_name): + active_sources.add(source_name) + changed |= _upsert_entry( + entries, + provider, + source_name, + { + "source": source_name, + "auth_type": AUTH_TYPE_OAUTH, + "access_token": token, + "expires_at_ms": creds.get("expires_at_ms"), + "base_url": creds.get("base_url", ""), + "label": creds.get("auth_file", source_name), + }, + ) except Exception as exc: logger.debug("Qwen OAuth token seed failed: %s", exc) @@ -1118,13 +1123,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup # the device_code source as suppressed so it won't be re-seeded from # the Hermes auth store. Without this gate the removal is instantly # undone on the next load_pool() call. - codex_suppressed = False - try: - from hermes_cli.auth import is_source_suppressed - codex_suppressed = is_source_suppressed(provider, "device_code") - except ImportError: - pass - if codex_suppressed: + if _is_suppressed(provider, "device_code"): return changed, active_sources state = _load_provider_state(auth_store, "openai-codex") @@ -1256,6 +1255,13 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b changed = False active_sources: Set[str] = set() + # Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons. + try: + from hermes_cli.auth import is_source_suppressed as _is_suppressed + except ImportError: + def _is_suppressed(_p, _s): # type: ignore[misc] + return False + # Seed from the custom_providers config entry's api_key field cp_config = _get_custom_provider_config(pool_key) if cp_config: @@ -1264,19 +1270,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b name = str(cp_config.get("name") or "").strip() if api_key: source = f"config:{name}" - active_sources.add(source) - changed |= _upsert_entry( - entries, - pool_key, - source, - { - "source": source, - "auth_type": AUTH_TYPE_API_KEY, - "access_token": api_key, - "base_url": base_url, - "label": name or source, - }, - ) + if not _is_suppressed(pool_key, source): + active_sources.add(source) + changed |= _upsert_entry( + entries, + pool_key, + source, + { + "source": source, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": api_key, + "base_url": base_url, + "label": name or source, + }, + ) # Seed from model.api_key if model.provider=='custom' and model.base_url matches try: @@ -1296,19 +1303,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b matched_key = get_custom_provider_pool_key(model_base_url) if matched_key == pool_key: source = "model_config" - active_sources.add(source) - changed |= _upsert_entry( - entries, - pool_key, - source, - { - "source": source, - "auth_type": AUTH_TYPE_API_KEY, - "access_token": model_api_key, - "base_url": model_base_url, - "label": "model_config", - }, - ) + if not _is_suppressed(pool_key, source): + active_sources.add(source) + changed |= _upsert_entry( + entries, + pool_key, + source, + { + "source": source, + "auth_type": AUTH_TYPE_API_KEY, + "access_token": model_api_key, + "base_url": model_base_url, + "label": "model_config", + }, + ) except Exception: pass diff --git a/agent/credential_sources.py b/agent/credential_sources.py new file mode 100644 index 0000000000..8ad2fade0b --- /dev/null +++ b/agent/credential_sources.py @@ -0,0 +1,401 @@ +"""Unified removal contract for every credential source Hermes reads from. + +Hermes seeds its credential pool from many places: + + env: — os.environ / ~/.hermes/.env + claude_code — ~/.claude/.credentials.json + hermes_pkce — ~/.hermes/.anthropic_oauth.json + device_code — auth.json providers. (nous, openai-codex, ...) + qwen-cli — ~/.qwen/oauth_creds.json + gh_cli — gh auth token + config: — custom_providers config entry + model_config — model.api_key when model.provider == "custom" + manual — user ran `hermes auth add` + +Each source has its own reader inside ``agent.credential_pool._seed_from_*`` +(which keep their existing shape — we haven't restructured them). What we +unify here is **removal**: + + ``hermes auth remove `` must make the pool entry stay gone. + +Before this module, every source had an ad-hoc removal branch in +``auth_remove_command``, and several sources had no branch at all — so +``auth remove`` silently reverted on the next ``load_pool()`` call for +qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and +custom-config sources. + +Now every source registers a ``RemovalStep`` that does exactly three things +in the same shape: + + 1. Clean up whatever externally-readable state the source reads from + (.env line, auth.json block, OAuth file, etc.) + 2. Suppress the ``(provider, source_id)`` in auth.json so the + corresponding ``_seed_from_*`` branch skips the upsert on re-load + 3. Return ``RemovalResult`` describing what was cleaned and any + diagnostic hints the user should see (shell-exported env vars, + external credential files we deliberately don't delete, etc.) + +Adding a new credential source is: + - wire up a reader branch in ``_seed_from_*`` (existing pattern) + - gate that reader behind ``is_source_suppressed(provider, source_id)`` + - register a ``RemovalStep`` here + +No more per-source if/elif chain in ``auth_remove_command``. +""" + +from __future__ import annotations + +import os +from dataclasses import dataclass, field +from pathlib import Path +from typing import Callable, List, Optional + + +@dataclass +class RemovalResult: + """Outcome of removing a credential source. + + Attributes: + cleaned: Short strings describing external state that was actually + mutated (``"Cleared XAI_API_KEY from .env"``, + ``"Cleared openai-codex OAuth tokens from auth store"``). + Printed as plain lines to the user. + hints: Diagnostic lines ABOUT state the user may need to clean up + themselves or is deliberately left intact (shell-exported env + var, Claude Code credential file we don't delete, etc.). + Printed as plain lines to the user. Always non-destructive. + suppress: Whether to call ``suppress_credential_source`` after + cleanup so future ``load_pool`` calls skip this source. + Default True — almost every source needs this to stay sticky. + The only legitimate False is ``manual`` entries, which aren't + seeded from anywhere external. + """ + + cleaned: List[str] = field(default_factory=list) + hints: List[str] = field(default_factory=list) + suppress: bool = True + + +@dataclass +class RemovalStep: + """How to remove one specific credential source cleanly. + + Attributes: + provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...). + Special value ``"*"`` means "matches any provider" — used for + sources like ``manual`` that aren't provider-specific. + source_id: Source identifier as it appears in + ``PooledCredential.source``. May be a literal (``"claude_code"``) + or a prefix pattern matched via ``match_fn``. + match_fn: Optional predicate overriding literal ``source_id`` + matching. Gets the removed entry's source string. Used for + ``env:*`` (any env-seeded key), ``config:*`` (any custom + pool), and ``manual:*`` (any manual-source variant). + remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the + actual cleanup and returns what happened for the user. + description: One-line human-readable description for docs / tests. + """ + + provider: str + source_id: str + remove_fn: Callable[..., RemovalResult] + match_fn: Optional[Callable[[str], bool]] = None + description: str = "" + + def matches(self, provider: str, source: str) -> bool: + if self.provider != "*" and self.provider != provider: + return False + if self.match_fn is not None: + return self.match_fn(source) + return source == self.source_id + + +_REGISTRY: List[RemovalStep] = [] + + +def register(step: RemovalStep) -> RemovalStep: + _REGISTRY.append(step) + return step + + +def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]: + """Return the first matching RemovalStep, or None if unregistered. + + Unregistered sources fall through to the default remove path in + ``auth_remove_command``: the pool entry is already gone (that happens + before dispatch), no external cleanup, no suppression. This is the + correct behaviour for ``manual`` entries — they were only ever stored + in the pool, nothing external to clean up. + """ + for step in _REGISTRY: + if step.matches(provider, source): + return step + return None + + +# --------------------------------------------------------------------------- +# Individual RemovalStep implementations — one per source. +# --------------------------------------------------------------------------- +# Each remove_fn is intentionally small and single-purpose. Adding a new +# credential source means adding ONE entry here — no other changes to +# auth_remove_command. + + +def _remove_env_source(provider: str, removed) -> RemovalResult: + """env: — the most common case. + + Handles three user situations: + 1. Var lives only in ~/.hermes/.env → clear it + 2. Var lives only in the user's shell (shell profile, systemd + EnvironmentFile, launchd plist) → hint them where to unset it + 3. Var lives in both → clear from .env, hint about shell + """ + from hermes_cli.config import get_env_path, remove_env_value + + result = RemovalResult() + env_var = removed.source[len("env:"):] + if not env_var: + return result + + # Detect shell vs .env BEFORE remove_env_value pops os.environ. + env_in_process = bool(os.getenv(env_var)) + env_in_dotenv = False + try: + env_path = get_env_path() + if env_path.exists(): + env_in_dotenv = any( + line.strip().startswith(f"{env_var}=") + for line in env_path.read_text(errors="replace").splitlines() + ) + except OSError: + pass + shell_exported = env_in_process and not env_in_dotenv + + cleared = remove_env_value(env_var) + if cleared: + result.cleaned.append(f"Cleared {env_var} from .env") + + if shell_exported: + result.hints.extend([ + f"Note: {env_var} is still set in your shell environment " + f"(not in ~/.hermes/.env).", + " Unset it there (shell profile, systemd EnvironmentFile, " + "launchd plist, etc.) or it will keep being visible to Hermes.", + f" The pool entry is now suppressed — Hermes will ignore " + f"{env_var} until you run `hermes auth add {provider}`.", + ]) + else: + result.hints.append( + f"Suppressed env:{env_var} — it will not be re-seeded even " + f"if the variable is re-exported later." + ) + return result + + +def _remove_claude_code(provider: str, removed) -> RemovalResult: + """~/.claude/.credentials.json is owned by Claude Code itself. + + We don't delete it — the user's Claude Code install still needs to + work. We just suppress it so Hermes stops reading it. + """ + return RemovalResult(hints=[ + "Suppressed claude_code credential — it will not be re-seeded.", + "Note: Claude Code credentials still live in ~/.claude/.credentials.json", + "Run `hermes auth add anthropic` to re-enable if needed.", + ]) + + +def _remove_hermes_pkce(provider: str, removed) -> RemovalResult: + """~/.hermes/.anthropic_oauth.json is ours — delete it outright.""" + from hermes_constants import get_hermes_home + + result = RemovalResult() + oauth_file = get_hermes_home() / ".anthropic_oauth.json" + if oauth_file.exists(): + try: + oauth_file.unlink() + result.cleaned.append("Cleared Hermes Anthropic OAuth credentials") + except OSError as exc: + result.hints.append(f"Could not delete {oauth_file}: {exc}") + return result + + +def _clear_auth_store_provider(provider: str) -> bool: + """Delete auth_store.providers[provider]. Returns True if deleted.""" + from hermes_cli.auth import ( + _auth_store_lock, + _load_auth_store, + _save_auth_store, + ) + + with _auth_store_lock(): + auth_store = _load_auth_store() + providers_dict = auth_store.get("providers") + if isinstance(providers_dict, dict) and provider in providers_dict: + del providers_dict[provider] + _save_auth_store(auth_store) + return True + return False + + +def _remove_nous_device_code(provider: str, removed) -> RemovalResult: + """Nous OAuth lives in auth.json providers.nous — clear it and suppress. + + We suppress in addition to clearing because nothing else stops the + user's next `hermes login` run from writing providers.nous again + before they decide to. Suppression forces them to go through + `hermes auth add nous` to re-engage, which is the documented re-add + path and clears the suppression atomically. + """ + result = RemovalResult() + if _clear_auth_store_provider(provider): + result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store") + return result + + +def _remove_codex_device_code(provider: str, removed) -> RemovalResult: + """Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json. + + refresh_codex_oauth_pure() writes both every time, so clearing only + the Hermes auth store is not enough — _seed_from_singletons() would + re-import from ~/.codex/auth.json on the next load_pool() call and + the removal would be instantly undone. We suppress instead of + deleting Codex CLI's file, so the Codex CLI itself keeps working. + + The canonical source name in ``_seed_from_singletons`` is + ``"device_code"`` (no prefix). Entries may show up in the pool as + either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added + via ``hermes auth add openai-codex``), but in both cases the re-seed + gate lives at the ``"device_code"`` suppression key. We suppress + that canonical key here; the central dispatcher also suppresses + ``removed.source`` which is fine — belt-and-suspenders, idempotent. + """ + from hermes_cli.auth import suppress_credential_source + + result = RemovalResult() + if _clear_auth_store_provider(provider): + result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store") + # Suppress the canonical re-seed source, not just whatever source the + # removed entry had. Otherwise `manual:device_code` removals wouldn't + # block the `device_code` re-seed path. + suppress_credential_source(provider, "device_code") + result.hints.extend([ + "Suppressed openai-codex device_code source — it will not be re-seeded.", + "Note: Codex CLI credentials still live in ~/.codex/auth.json", + "Run `hermes auth add openai-codex` to re-enable if needed.", + ]) + return result + + +def _remove_qwen_cli(provider: str, removed) -> RemovalResult: + """~/.qwen/oauth_creds.json is owned by the Qwen CLI. + + Same pattern as claude_code — suppress, don't delete. The user's + Qwen CLI install still reads from that file. + """ + return RemovalResult(hints=[ + "Suppressed qwen-cli credential — it will not be re-seeded.", + "Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json", + "Run `hermes auth add qwen-oauth` to re-enable if needed.", + ]) + + +def _remove_copilot_gh(provider: str, removed) -> RemovalResult: + """Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN. + + Copilot is special: the same token can be seeded as multiple source + entries (gh_cli from ``_seed_from_singletons`` plus env: from + ``_seed_from_env``), so removing one entry without suppressing the + others lets the duplicates resurrect. We suppress ALL known copilot + sources here so removal is stable regardless of which entry the + user clicked. + + We don't touch the user's gh CLI or shell state — just suppress so + Hermes stops picking the token up. + """ + # Suppress ALL copilot source variants up-front so no path resurrects + # the pool entry. The central dispatcher in auth_remove_command will + # ALSO suppress removed.source, but it's idempotent so double-calling + # is harmless. + from hermes_cli.auth import suppress_credential_source + suppress_credential_source(provider, "gh_cli") + for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"): + suppress_credential_source(provider, f"env:{env_var}") + + return RemovalResult(hints=[ + "Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.", + "Note: Your gh CLI / shell environment is unchanged.", + "Run `hermes auth add copilot` to re-enable if needed.", + ]) + + +def _remove_custom_config(provider: str, removed) -> RemovalResult: + """Custom provider pools are seeded from custom_providers config or + model.api_key. Both are in config.yaml — modifying that from here + is more invasive than suppression. We suppress; the user can edit + config.yaml if they want to remove the key from disk entirely. + """ + source_label = removed.source + return RemovalResult(hints=[ + f"Suppressed {source_label} — it will not be re-seeded.", + "Note: The underlying value in config.yaml is unchanged. Edit it " + "directly if you want to remove the credential from disk.", + ]) + + +def _register_all_sources() -> None: + """Called once on module import. + + ORDER MATTERS — ``find_removal_step`` returns the first match. Put + provider-specific steps before the generic ``env:*`` step so that e.g. + copilot's ``env:GH_TOKEN`` goes through the copilot removal (which + doesn't touch the user's shell), not the generic env-var removal + (which would try to clear .env). + """ + register(RemovalStep( + provider="copilot", source_id="gh_cli", + match_fn=lambda src: src == "gh_cli" or src.startswith("env:"), + remove_fn=_remove_copilot_gh, + description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN", + )) + register(RemovalStep( + provider="*", source_id="env:", + match_fn=lambda src: src.startswith("env:"), + remove_fn=_remove_env_source, + description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)", + )) + register(RemovalStep( + provider="anthropic", source_id="claude_code", + remove_fn=_remove_claude_code, + description="~/.claude/.credentials.json", + )) + register(RemovalStep( + provider="anthropic", source_id="hermes_pkce", + remove_fn=_remove_hermes_pkce, + description="~/.hermes/.anthropic_oauth.json", + )) + register(RemovalStep( + provider="nous", source_id="device_code", + remove_fn=_remove_nous_device_code, + description="auth.json providers.nous", + )) + register(RemovalStep( + provider="openai-codex", source_id="device_code", + match_fn=lambda src: src == "device_code" or src.endswith(":device_code"), + remove_fn=_remove_codex_device_code, + description="auth.json providers.openai-codex + ~/.codex/auth.json", + )) + register(RemovalStep( + provider="qwen-oauth", source_id="qwen-cli", + remove_fn=_remove_qwen_cli, + description="~/.qwen/oauth_creds.json", + )) + register(RemovalStep( + provider="*", source_id="config:", + match_fn=lambda src: src.startswith("config:") or src == "model_config", + remove_fn=_remove_custom_config, + description="Custom provider config.yaml api_key field", + )) + + +_register_all_sources() diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index 4fe5f3f2e4..9c33200107 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -152,9 +152,11 @@ def auth_add_command(args) -> None: pool = load_pool(provider) - # Clear any env: suppressions for this provider — re-adding a - # credential is a strong signal the user wants auth for this provider - # re-enabled. Matches the Codex device_code re-link pattern below. + # Clear ALL suppressions for this provider — re-adding a credential is + # a strong signal the user wants auth re-enabled. This covers env:* + # (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli, + # device_code (codex), etc. One consistent re-engagement pattern. + # Matches the Codex device_code re-link pattern that predates this. if not provider.startswith(CUSTOM_POOL_PREFIX): try: from hermes_cli.auth import ( @@ -163,8 +165,7 @@ def auth_add_command(args) -> None: ) suppressed = _load_auth_store().get("suppressed_sources", {}) for src in list(suppressed.get(provider, []) or []): - if src.startswith("env:"): - unsuppress_credential_source(provider, src) + unsuppress_credential_source(provider, src) except Exception: pass @@ -354,113 +355,28 @@ def auth_remove_command(args) -> None: raise SystemExit(f'No credential matching "{target}" for provider {provider}.') print(f"Removed {provider} credential #{index} ({removed.label})") - # If this was an env-seeded credential, also clear the env var from .env - # so it doesn't get re-seeded on the next load_pool() call. If the env - # var is also (or only) exported by the user's shell/systemd, .env - # cleanup alone is not enough — the next process to call load_pool() - # will re-read os.environ and resurrect the entry. Suppress the - # env: source so _seed_from_env() skips it, and tell the user - # where the shell-level copy is still living so they can remove it. - if removed.source.startswith("env:"): - import os as _os - env_var = removed.source[len("env:"):] - if env_var: - from hermes_cli.config import get_env_path, remove_env_value - from hermes_cli.auth import suppress_credential_source + # Unified removal dispatch. Every credential source Hermes reads from + # (env vars, external OAuth files, auth.json blocks, custom config) + # has a RemovalStep registered in agent.credential_sources. The step + # handles its source-specific cleanup and we centralise suppression + + # user-facing output here so every source behaves identically from + # the user's perspective. + from agent.credential_sources import find_removal_step + from hermes_cli.auth import suppress_credential_source - # Detect whether the var lives in .env, the shell env, or both, - # BEFORE remove_env_value() mutates os.environ. - env_in_process = bool(_os.getenv(env_var)) - env_in_dotenv = False - try: - env_path = get_env_path() - if env_path.exists(): - env_in_dotenv = any( - line.strip().startswith(f"{env_var}=") - for line in env_path.read_text(errors="replace").splitlines() - ) - except OSError: - pass - shell_exported = env_in_process and not env_in_dotenv + step = find_removal_step(provider, removed.source) + if step is None: + # Unregistered source — e.g. "manual", which has nothing external + # to clean up. The pool entry is already gone; we're done. + return - cleared = remove_env_value(env_var) - if cleared: - print(f"Cleared {env_var} from .env") - suppress_credential_source(provider, removed.source) - if shell_exported: - print( - f"Note: {env_var} is still set in your shell environment " - f"(not in ~/.hermes/.env)." - ) - print( - " Unset it there (shell profile, systemd EnvironmentFile, " - "launchd plist, etc.) or it will keep being visible to Hermes." - ) - print( - f" The pool entry is now suppressed — Hermes will ignore " - f"{env_var} until you run `hermes auth add {provider}`." - ) - else: - print( - f"Suppressed env:{env_var} — it will not be re-seeded even " - f"if the variable is re-exported later." - ) - - # If this was a singleton-seeded credential (OAuth device_code, hermes_pkce), - # clear the underlying auth store / credential file so it doesn't get - # re-seeded on the next load_pool() call. - elif provider == "openai-codex" and ( - removed.source == "device_code" or removed.source.endswith(":device_code") - ): - # Codex tokens live in TWO places: the Hermes auth store and - # ~/.codex/auth.json (the Codex CLI shared file). On every refresh, - # refresh_codex_oauth_pure() writes to both. So clearing only the - # Hermes auth store is not enough — _seed_from_singletons() will - # auto-import from ~/.codex/auth.json on the next load_pool() and - # the removal is instantly undone. Mark the source as suppressed - # so auto-import is skipped; leave ~/.codex/auth.json untouched so - # the Codex CLI itself keeps working. - from hermes_cli.auth import ( - _load_auth_store, _save_auth_store, _auth_store_lock, - suppress_credential_source, - ) - with _auth_store_lock(): - auth_store = _load_auth_store() - providers_dict = auth_store.get("providers") - if isinstance(providers_dict, dict) and provider in providers_dict: - del providers_dict[provider] - _save_auth_store(auth_store) - print(f"Cleared {provider} OAuth tokens from auth store") - suppress_credential_source(provider, "device_code") - print("Suppressed openai-codex device_code source — it will not be re-seeded.") - print("Note: Codex CLI credentials still live in ~/.codex/auth.json") - print("Run `hermes auth add openai-codex` to re-enable if needed.") - - elif removed.source == "device_code" and provider == "nous": - from hermes_cli.auth import ( - _load_auth_store, _save_auth_store, _auth_store_lock, - ) - with _auth_store_lock(): - auth_store = _load_auth_store() - providers_dict = auth_store.get("providers") - if isinstance(providers_dict, dict) and provider in providers_dict: - del providers_dict[provider] - _save_auth_store(auth_store) - print(f"Cleared {provider} OAuth tokens from auth store") - - elif removed.source == "hermes_pkce" and provider == "anthropic": - from hermes_constants import get_hermes_home - oauth_file = get_hermes_home() / ".anthropic_oauth.json" - if oauth_file.exists(): - oauth_file.unlink() - print("Cleared Hermes Anthropic OAuth credentials") - - elif removed.source == "claude_code" and provider == "anthropic": - from hermes_cli.auth import suppress_credential_source - suppress_credential_source(provider, "claude_code") - print("Suppressed claude_code credential — it will not be re-seeded.") - print("Note: Claude Code credentials still live in ~/.claude/.credentials.json") - print("Run `hermes auth add anthropic` to re-enable if needed.") + result = step.remove_fn(provider, removed) + for line in result.cleaned: + print(line) + if result.suppress: + suppress_credential_source(provider, removed.source) + for line in result.hints: + print(line) def auth_reset_command(args) -> None: diff --git a/tests/hermes_cli/test_auth_commands.py b/tests/hermes_cli/test_auth_commands.py index a017185573..fb749b6ae7 100644 --- a/tests/hermes_cli/test_auth_commands.py +++ b/tests/hermes_cli/test_auth_commands.py @@ -1185,3 +1185,292 @@ def test_seed_from_env_respects_openrouter_suppression(tmp_path, monkeypatch): assert changed is False assert entries == [] assert active == set() + + +# ============================================================================= +# Unified credential-source stickiness — every source Hermes reads from has a +# registered RemovalStep in agent.credential_sources, and every seeding path +# gates on is_source_suppressed. Below: one test per source proving remove +# sticks across a fresh load_pool() call. +# ============================================================================= + + +def test_seed_from_singletons_respects_nous_suppression(tmp_path, monkeypatch): + """nous device_code must not re-seed from auth.json when suppressed.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {"nous": {"access_token": "tok", "refresh_token": "r", "expires_at": 9999999999}}, + "suppressed_sources": {"nous": ["device_code"]}, + })) + + from agent.credential_pool import _seed_from_singletons + entries = [] + changed, active = _seed_from_singletons("nous", entries) + assert changed is False + assert entries == [] + assert active == set() + + +def test_seed_from_singletons_respects_copilot_suppression(tmp_path, monkeypatch): + """copilot gh_cli must not re-seed when suppressed.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {"copilot": ["gh_cli"]}, + })) + + # Stub resolve_copilot_token to return a live token + import hermes_cli.copilot_auth as ca + monkeypatch.setattr(ca, "resolve_copilot_token", lambda: ("ghp_fake", "gh auth token")) + + from agent.credential_pool import _seed_from_singletons + entries = [] + changed, active = _seed_from_singletons("copilot", entries) + assert changed is False + assert entries == [] + assert active == set() + + +def test_seed_from_singletons_respects_qwen_suppression(tmp_path, monkeypatch): + """qwen-oauth qwen-cli must not re-seed from ~/.qwen/oauth_creds.json when suppressed.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {"qwen-oauth": ["qwen-cli"]}, + })) + + import hermes_cli.auth as ha + monkeypatch.setattr(ha, "resolve_qwen_runtime_credentials", lambda **kw: { + "api_key": "tok", "source": "qwen-cli", "base_url": "https://q", + }) + + from agent.credential_pool import _seed_from_singletons + entries = [] + changed, active = _seed_from_singletons("qwen-oauth", entries) + assert changed is False + assert entries == [] + assert active == set() + + +def test_seed_from_singletons_respects_hermes_pkce_suppression(tmp_path, monkeypatch): + """anthropic hermes_pkce must not re-seed from ~/.hermes/.anthropic_oauth.json when suppressed.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + import yaml + (hermes_home / "config.yaml").write_text(yaml.dump({"model": {"provider": "anthropic", "model": "claude"}})) + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {"anthropic": ["hermes_pkce"]}, + })) + + # Stub the readers so only hermes_pkce is "available"; claude_code returns None + import agent.anthropic_adapter as aa + monkeypatch.setattr(aa, "read_hermes_oauth_credentials", lambda: { + "accessToken": "tok", "refreshToken": "r", "expiresAt": 9999999999000, + }) + monkeypatch.setattr(aa, "read_claude_code_credentials", lambda: None) + + from agent.credential_pool import _seed_from_singletons + entries = [] + changed, active = _seed_from_singletons("anthropic", entries) + # hermes_pkce suppressed, claude_code returns None → nothing should be seeded + assert entries == [] + assert "hermes_pkce" not in active + + +def test_seed_custom_pool_respects_config_suppression(tmp_path, monkeypatch): + """Custom provider config: source must not re-seed when suppressed.""" + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + import yaml + (hermes_home / "config.yaml").write_text(yaml.dump({ + "model": {}, + "custom_providers": [ + {"name": "my", "base_url": "https://c.example.com", "api_key": "sk-custom"}, + ], + })) + + from agent.credential_pool import _seed_custom_pool, get_custom_provider_pool_key + pool_key = get_custom_provider_pool_key("https://c.example.com") + + (hermes_home / "auth.json").write_text(json.dumps({ + "version": 1, + "providers": {}, + "suppressed_sources": {pool_key: ["config:my"]}, + })) + + entries = [] + changed, active = _seed_custom_pool(pool_key, entries) + assert changed is False + assert entries == [] + assert "config:my" not in active + + +def test_credential_sources_registry_has_expected_steps(): + """Sanity check — the registry contains the expected RemovalSteps. + + Guards against accidentally dropping a step during future refactors. + If you add a new credential source, add it to the expected set below. + """ + from agent.credential_sources import _REGISTRY + + descriptions = {step.description for step in _REGISTRY} + expected = { + "gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN", + "Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)", + "~/.claude/.credentials.json", + "~/.hermes/.anthropic_oauth.json", + "auth.json providers.nous", + "auth.json providers.openai-codex + ~/.codex/auth.json", + "~/.qwen/oauth_creds.json", + "Custom provider config.yaml api_key field", + } + assert descriptions == expected, f"Registry mismatch. Got: {descriptions}" + + +def test_credential_sources_find_step_returns_none_for_manual(): + """Manual entries have nothing external to clean up — no step registered.""" + from agent.credential_sources import find_removal_step + assert find_removal_step("openrouter", "manual") is None + assert find_removal_step("xai", "manual") is None + + +def test_credential_sources_find_step_copilot_before_generic_env(tmp_path, monkeypatch): + """copilot env:GH_TOKEN must dispatch to the copilot step, not the + generic env-var step. The copilot step handles the duplicate-source + problem (same token seeded as both gh_cli and env:); the generic + env step would only suppress one of the variants. + """ + from agent.credential_sources import find_removal_step + + step = find_removal_step("copilot", "env:GH_TOKEN") + assert step is not None + assert "copilot" in step.description.lower() or "gh" in step.description.lower() + + # Generic step still matches any other provider's env var + step = find_removal_step("xai", "env:XAI_API_KEY") + assert step is not None + assert "env-seeded" in step.description.lower() + + +def test_auth_remove_copilot_suppresses_all_variants(tmp_path, monkeypatch): + """Removing any copilot source must suppress gh_cli + all env:* variants + so the duplicate-seed paths don't resurrect the credential. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + _write_auth_store( + tmp_path, + { + "version": 1, + "credential_pool": { + "copilot": [{ + "id": "c1", + "label": "gh auth token", + "auth_type": "api_key", + "priority": 0, + "source": "gh_cli", + "access_token": "ghp_fake", + }] + }, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth import is_source_suppressed + from hermes_cli.auth_commands import auth_remove_command + + auth_remove_command(SimpleNamespace(provider="copilot", target="1")) + + assert is_source_suppressed("copilot", "gh_cli") + assert is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN") + assert is_source_suppressed("copilot", "env:GH_TOKEN") + assert is_source_suppressed("copilot", "env:GITHUB_TOKEN") + + +def test_auth_add_clears_all_suppressions_including_non_env(tmp_path, monkeypatch): + """Re-adding a credential via `hermes auth add ` clears ALL + suppression markers for the provider, not just env:*. This matches + the single "re-engage" semantic — the user wants auth back, period. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + _write_auth_store( + tmp_path, + { + "version": 1, + "providers": {}, + "suppressed_sources": { + "copilot": ["gh_cli", "env:GH_TOKEN", "env:COPILOT_GITHUB_TOKEN"], + }, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth import is_source_suppressed + from hermes_cli.auth_commands import auth_add_command + + auth_add_command(SimpleNamespace( + provider="copilot", auth_type="api_key", + api_key="ghp-manual", label="m", + )) + + assert not is_source_suppressed("copilot", "gh_cli") + assert not is_source_suppressed("copilot", "env:GH_TOKEN") + assert not is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN") + + +def test_auth_remove_codex_manual_device_code_suppresses_canonical(tmp_path, monkeypatch): + """Removing a manual:device_code entry (from `hermes auth add openai-codex`) + must suppress the canonical ``device_code`` key, not ``manual:device_code``. + The re-seed gate in _seed_from_singletons checks ``device_code``. + """ + hermes_home = tmp_path / "hermes" + hermes_home.mkdir(parents=True, exist_ok=True) + monkeypatch.setenv("HERMES_HOME", str(hermes_home)) + + _write_auth_store( + tmp_path, + { + "version": 1, + "providers": {"openai-codex": {"tokens": {"access_token": "t", "refresh_token": "r"}}}, + "credential_pool": { + "openai-codex": [{ + "id": "cdx", + "label": "manual-codex", + "auth_type": "oauth", + "priority": 0, + "source": "manual:device_code", + "access_token": "t", + }] + }, + }, + ) + + from types import SimpleNamespace + from hermes_cli.auth import is_source_suppressed + from hermes_cli.auth_commands import auth_remove_command + + auth_remove_command(SimpleNamespace(provider="openai-codex", target="1")) + assert is_source_suppressed("openai-codex", "device_code") From 8a11b0a204c20705725696818e2298a6182ff891 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Tue, 21 Apr 2026 01:54:02 -0700 Subject: [PATCH 29/63] feat(account-usage): add per-provider account limits module Ports agent/account_usage.py and its tests from the original PR #2486 branch. Defines AccountUsageSnapshot / AccountUsageWindow dataclasses, a shared renderer, and provider-specific fetchers for OpenAI Codex (wham/usage), Anthropic OAuth (oauth/usage), and OpenRouter (/credits and /key). Wiring into /usage lands in a follow-up salvage commit. Authored-by: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> --- agent/account_usage.py | 326 ++++++++++++++++++++++++++++++++++++ tests/test_account_usage.py | 203 ++++++++++++++++++++++ 2 files changed, 529 insertions(+) create mode 100644 agent/account_usage.py create mode 100644 tests/test_account_usage.py diff --git a/agent/account_usage.py b/agent/account_usage.py new file mode 100644 index 0000000000..0e9562dcc9 --- /dev/null +++ b/agent/account_usage.py @@ -0,0 +1,326 @@ +from __future__ import annotations + +from dataclasses import dataclass +from datetime import datetime, timezone +from typing import Any, Optional + +import httpx + +from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token +from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials +from hermes_cli.runtime_provider import resolve_runtime_provider + + +def _utc_now() -> datetime: + return datetime.now(timezone.utc) + + +@dataclass(frozen=True) +class AccountUsageWindow: + label: str + used_percent: Optional[float] = None + reset_at: Optional[datetime] = None + detail: Optional[str] = None + + +@dataclass(frozen=True) +class AccountUsageSnapshot: + provider: str + source: str + fetched_at: datetime + title: str = "Account limits" + plan: Optional[str] = None + windows: tuple[AccountUsageWindow, ...] = () + details: tuple[str, ...] = () + unavailable_reason: Optional[str] = None + + @property + def available(self) -> bool: + return bool(self.windows or self.details) and not self.unavailable_reason + + +def _title_case_slug(value: Optional[str]) -> Optional[str]: + cleaned = str(value or "").strip() + if not cleaned: + return None + return cleaned.replace("_", " ").replace("-", " ").title() + + +def _parse_dt(value: Any) -> Optional[datetime]: + if value in (None, ""): + return None + if isinstance(value, (int, float)): + return datetime.fromtimestamp(float(value), tz=timezone.utc) + if isinstance(value, str): + text = value.strip() + if not text: + return None + if text.endswith("Z"): + text = text[:-1] + "+00:00" + try: + dt = datetime.fromisoformat(text) + return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc) + except ValueError: + return None + return None + + +def _format_reset(dt: Optional[datetime]) -> str: + if not dt: + return "unknown" + local_dt = dt.astimezone() + delta = dt - _utc_now() + total_seconds = int(delta.total_seconds()) + if total_seconds <= 0: + return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})" + hours, rem = divmod(total_seconds, 3600) + minutes = rem // 60 + if hours >= 24: + days, hours = divmod(hours, 24) + rel = f"in {days}d {hours}h" + elif hours > 0: + rel = f"in {hours}h {minutes}m" + else: + rel = f"in {minutes}m" + return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})" + + +def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]: + if not snapshot: + return [] + header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}" + lines = [header] + if snapshot.plan: + lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})") + else: + lines.append(f"Provider: {snapshot.provider}") + for window in snapshot.windows: + if window.used_percent is None: + base = f"{window.label}: unavailable" + else: + remaining = max(0, round(100 - float(window.used_percent))) + used = max(0, round(float(window.used_percent))) + base = f"{window.label}: {remaining}% remaining ({used}% used)" + if window.reset_at: + base += f" • resets {_format_reset(window.reset_at)}" + elif window.detail: + base += f" • {window.detail}" + lines.append(base) + for detail in snapshot.details: + lines.append(detail) + if snapshot.unavailable_reason: + lines.append(f"Unavailable: {snapshot.unavailable_reason}") + return lines + + +def _resolve_codex_usage_url(base_url: str) -> str: + normalized = (base_url or "").strip().rstrip("/") + if not normalized: + normalized = "https://chatgpt.com/backend-api/codex" + if normalized.endswith("/codex"): + normalized = normalized[: -len("/codex")] + if "/backend-api" in normalized: + return normalized + "/wham/usage" + return normalized + "/api/codex/usage" + + +def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]: + creds = resolve_codex_runtime_credentials(refresh_if_expiring=True) + token_data = _read_codex_tokens() + tokens = token_data.get("tokens") or {} + account_id = str(tokens.get("account_id", "") or "").strip() or None + headers = { + "Authorization": f"Bearer {creds['api_key']}", + "Accept": "application/json", + "User-Agent": "codex-cli", + } + if account_id: + headers["ChatGPT-Account-Id"] = account_id + with httpx.Client(timeout=15.0) as client: + response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers) + response.raise_for_status() + payload = response.json() or {} + rate_limit = payload.get("rate_limit") or {} + windows: list[AccountUsageWindow] = [] + for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")): + window = rate_limit.get(key) or {} + used = window.get("used_percent") + if used is None: + continue + windows.append( + AccountUsageWindow( + label=label, + used_percent=float(used), + reset_at=_parse_dt(window.get("reset_at")), + ) + ) + details: list[str] = [] + credits = payload.get("credits") or {} + if credits.get("has_credits"): + balance = credits.get("balance") + if isinstance(balance, (int, float)): + details.append(f"Credits balance: ${float(balance):.2f}") + elif credits.get("unlimited"): + details.append("Credits balance: unlimited") + return AccountUsageSnapshot( + provider="openai-codex", + source="usage_api", + fetched_at=_utc_now(), + plan=_title_case_slug(payload.get("plan_type")), + windows=tuple(windows), + details=tuple(details), + ) + + +def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]: + token = (resolve_anthropic_token() or "").strip() + if not token: + return None + if not _is_oauth_token(token): + return AccountUsageSnapshot( + provider="anthropic", + source="oauth_usage_api", + fetched_at=_utc_now(), + unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.", + ) + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/json", + "Content-Type": "application/json", + "anthropic-beta": "oauth-2025-04-20", + "User-Agent": "claude-code/2.1.0", + } + with httpx.Client(timeout=15.0) as client: + response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers) + response.raise_for_status() + payload = response.json() or {} + windows: list[AccountUsageWindow] = [] + mapping = ( + ("five_hour", "Current session"), + ("seven_day", "Current week"), + ("seven_day_opus", "Opus week"), + ("seven_day_sonnet", "Sonnet week"), + ) + for key, label in mapping: + window = payload.get(key) or {} + util = window.get("utilization") + if util is None: + continue + used = float(util) * 100 if float(util) <= 1 else float(util) + windows.append( + AccountUsageWindow( + label=label, + used_percent=used, + reset_at=_parse_dt(window.get("resets_at")), + ) + ) + details: list[str] = [] + extra = payload.get("extra_usage") or {} + if extra.get("is_enabled"): + used_credits = extra.get("used_credits") + monthly_limit = extra.get("monthly_limit") + currency = extra.get("currency") or "USD" + if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)): + details.append( + f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}" + ) + return AccountUsageSnapshot( + provider="anthropic", + source="oauth_usage_api", + fetched_at=_utc_now(), + windows=tuple(windows), + details=tuple(details), + ) + + +def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]: + runtime = resolve_runtime_provider( + requested="openrouter", + explicit_base_url=base_url, + explicit_api_key=api_key, + ) + token = str(runtime.get("api_key", "") or "").strip() + if not token: + return None + normalized = str(runtime.get("base_url", "") or "").rstrip("/") + credits_url = f"{normalized}/credits" + key_url = f"{normalized}/key" + headers = { + "Authorization": f"Bearer {token}", + "Accept": "application/json", + } + with httpx.Client(timeout=10.0) as client: + credits_resp = client.get(credits_url, headers=headers) + credits_resp.raise_for_status() + credits = (credits_resp.json() or {}).get("data") or {} + try: + key_resp = client.get(key_url, headers=headers) + key_resp.raise_for_status() + key_data = (key_resp.json() or {}).get("data") or {} + except Exception: + key_data = {} + total_credits = float(credits.get("total_credits") or 0.0) + total_usage = float(credits.get("total_usage") or 0.0) + details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"] + windows: list[AccountUsageWindow] = [] + limit = key_data.get("limit") + limit_remaining = key_data.get("limit_remaining") + limit_reset = str(key_data.get("limit_reset") or "").strip() + usage = key_data.get("usage") + if ( + isinstance(limit, (int, float)) + and float(limit) > 0 + and isinstance(limit_remaining, (int, float)) + and 0 <= float(limit_remaining) <= float(limit) + ): + limit_value = float(limit) + remaining_value = float(limit_remaining) + used_percent = ((limit_value - remaining_value) / limit_value) * 100 + detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"] + if limit_reset: + detail_parts.append(f"resets {limit_reset}") + windows.append( + AccountUsageWindow( + label="API key quota", + used_percent=used_percent, + detail=" • ".join(detail_parts), + ) + ) + if isinstance(usage, (int, float)): + usage_parts = [f"API key usage: ${float(usage):.2f} total"] + for value, label in ( + (key_data.get("usage_daily"), "today"), + (key_data.get("usage_weekly"), "this week"), + (key_data.get("usage_monthly"), "this month"), + ): + if isinstance(value, (int, float)) and float(value) > 0: + usage_parts.append(f"${float(value):.2f} {label}") + details.append(" • ".join(usage_parts)) + return AccountUsageSnapshot( + provider="openrouter", + source="credits_api", + fetched_at=_utc_now(), + windows=tuple(windows), + details=tuple(details), + ) + + +def fetch_account_usage( + provider: Optional[str], + *, + base_url: Optional[str] = None, + api_key: Optional[str] = None, +) -> Optional[AccountUsageSnapshot]: + normalized = str(provider or "").strip().lower() + if normalized in {"", "auto", "custom"}: + return None + try: + if normalized == "openai-codex": + return _fetch_codex_account_usage() + if normalized == "anthropic": + return _fetch_anthropic_account_usage() + if normalized == "openrouter": + return _fetch_openrouter_account_usage(base_url, api_key) + except Exception: + return None + return None diff --git a/tests/test_account_usage.py b/tests/test_account_usage.py new file mode 100644 index 0000000000..072dc21c6f --- /dev/null +++ b/tests/test_account_usage.py @@ -0,0 +1,203 @@ +from datetime import datetime, timezone + +from agent.account_usage import ( + AccountUsageSnapshot, + AccountUsageWindow, + fetch_account_usage, + render_account_usage_lines, +) + + +class _Response: + def __init__(self, payload, status_code=200): + self._payload = payload + self.status_code = status_code + + def raise_for_status(self): + if self.status_code >= 400: + raise RuntimeError(f"HTTP {self.status_code}") + + def json(self): + return self._payload + + +class _Client: + def __init__(self, payload): + self._payload = payload + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def get(self, url, headers=None): + return _Response(self._payload) + + +class _RoutingClient: + def __init__(self, payloads): + self._payloads = payloads + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def get(self, url, headers=None): + return _Response(self._payloads[url]) + + +def test_fetch_account_usage_codex(monkeypatch): + monkeypatch.setattr( + "agent.account_usage.resolve_codex_runtime_credentials", + lambda refresh_if_expiring=True: { + "provider": "openai-codex", + "base_url": "https://chatgpt.com/backend-api/codex", + "api_key": "access-token", + }, + ) + monkeypatch.setattr( + "agent.account_usage._read_codex_tokens", + lambda: {"tokens": {"account_id": "acct_123"}}, + ) + monkeypatch.setattr( + "agent.account_usage.httpx.Client", + lambda timeout=15.0: _Client( + { + "plan_type": "pro", + "rate_limit": { + "primary_window": { + "used_percent": 15, + "reset_at": 1_900_000_000, + "limit_window_seconds": 18000, + }, + "secondary_window": { + "used_percent": 40, + "reset_at": 1_900_500_000, + "limit_window_seconds": 604800, + }, + }, + "credits": {"has_credits": True, "balance": 12.5}, + } + ), + ) + + snapshot = fetch_account_usage("openai-codex") + + assert snapshot is not None + assert snapshot.plan == "Pro" + assert len(snapshot.windows) == 2 + assert snapshot.windows[0].label == "Session" + assert snapshot.windows[0].used_percent == 15.0 + assert snapshot.windows[0].reset_at == datetime.fromtimestamp(1_900_000_000, tz=timezone.utc) + assert "Credits balance: $12.50" in snapshot.details + + +def test_render_account_usage_lines_includes_reset_and_provider(): + snapshot = AccountUsageSnapshot( + provider="openai-codex", + source="usage_api", + fetched_at=datetime.now(timezone.utc), + plan="Pro", + windows=( + AccountUsageWindow( + label="Session", + used_percent=25, + reset_at=datetime.now(timezone.utc), + ), + ), + details=("Credits balance: $9.99",), + ) + lines = render_account_usage_lines(snapshot) + + assert lines[0] == "📈 Account limits" + assert "openai-codex (Pro)" in lines[1] + assert "Session: 75% remaining (25% used)" in lines[2] + assert "Credits balance: $9.99" in lines[3] + + +def test_fetch_account_usage_openrouter_uses_limit_remaining_and_ignores_deprecated_rate_limit(monkeypatch): + monkeypatch.setattr( + "agent.account_usage.resolve_runtime_provider", + lambda requested, explicit_base_url=None, explicit_api_key=None: { + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-test", + }, + ) + monkeypatch.setattr( + "agent.account_usage.httpx.Client", + lambda timeout=10.0: _RoutingClient( + { + "https://openrouter.ai/api/v1/credits": { + "data": {"total_credits": 300.0, "total_usage": 10.92} + }, + "https://openrouter.ai/api/v1/key": { + "data": { + "limit": 100.0, + "limit_remaining": 70.0, + "limit_reset": "monthly", + "usage": 12.5, + "usage_daily": 0.5, + "usage_weekly": 2.0, + "usage_monthly": 8.0, + "rate_limit": {"requests": -1, "interval": "10s"}, + } + }, + } + ), + ) + + snapshot = fetch_account_usage("openrouter") + + assert snapshot is not None + assert snapshot.windows == ( + AccountUsageWindow( + label="API key quota", + used_percent=30.0, + detail="$70.00 of $100.00 remaining • resets monthly", + ), + ) + assert "Credits balance: $289.08" in snapshot.details + assert "API key usage: $12.50 total • $0.50 today • $2.00 this week • $8.00 this month" in snapshot.details + assert all("-1 requests / 10s" not in line for line in render_account_usage_lines(snapshot)) + + +def test_fetch_account_usage_openrouter_omits_quota_window_when_key_has_no_limit(monkeypatch): + monkeypatch.setattr( + "agent.account_usage.resolve_runtime_provider", + lambda requested, explicit_base_url=None, explicit_api_key=None: { + "provider": "openrouter", + "base_url": "https://openrouter.ai/api/v1", + "api_key": "sk-test", + }, + ) + monkeypatch.setattr( + "agent.account_usage.httpx.Client", + lambda timeout=10.0: _RoutingClient( + { + "https://openrouter.ai/api/v1/credits": { + "data": {"total_credits": 100.0, "total_usage": 25.5} + }, + "https://openrouter.ai/api/v1/key": { + "data": { + "limit": None, + "limit_remaining": None, + "usage": 25.5, + "usage_daily": 1.25, + "usage_weekly": 4.5, + "usage_monthly": 18.0, + } + }, + } + ), + ) + + snapshot = fetch_account_usage("openrouter") + + assert snapshot is not None + assert snapshot.windows == () + assert "Credits balance: $74.50" in snapshot.details + assert "API key usage: $25.50 total • $1.25 today • $4.50 this week • $18.00 this month" in snapshot.details From bcc5d7b67dd69b3708b43246f600745801ce6905 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 01:54:10 -0700 Subject: [PATCH 30/63] feat(/usage): append account limits section in CLI and gateway Wires the agent/account_usage module from the preceding commit into /usage so users see provider-side quota/credit info alongside the existing session token report. CLI: - `_show_usage` appends account lines under the token table. Fetch runs in a 1-worker ThreadPoolExecutor with a 10s timeout so a slow provider API can never hang the prompt. Gateway: - `_handle_usage_command` resolves provider from the live agent when available, else from the persisted billing_provider/billing_base_url on the SessionDB row, so /usage still returns account info between turns when no agent is resident. Fetch runs via asyncio.to_thread. - Account section is appended to all three return branches: running agent, no-agent-with-history, and the new no-agent-no-history path (falls back to account-only output instead of "no data"). Tests: - 2 new tests in tests/gateway/test_usage_command.py cover the live- agent account section and the persisted-billing fallback path. Salvaged from PR #2486 by @kshitijk4poor. The original branch had drifted ~2615 commits behind main and rewrote _show_usage wholesale, which would have dropped the rate-limit and cached-agent blocks added in PRs #6541 and #7038. This commit re-adds only the new behavior on top of current main. --- cli.py | 23 +++++++++ gateway/run.py | 56 ++++++++++++++++++--- tests/gateway/test_usage_command.py | 76 +++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 6 deletions(-) diff --git a/cli.py b/cli.py index 48af2c69fb..624139076d 100644 --- a/cli.py +++ b/cli.py @@ -19,6 +19,7 @@ import shutil import sys import json import re +import concurrent.futures import base64 import atexit import tempfile @@ -65,6 +66,7 @@ from agent.usage_pricing import ( format_duration_compact, format_token_count_compact, ) +from agent.account_usage import fetch_account_usage, render_account_usage_lines from hermes_cli.banner import _format_context_length, format_banner_version_label _COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏") @@ -7018,6 +7020,27 @@ class HermesCLI: if cost_result.status == "unknown": print(f" Note: Pricing unknown for {agent.model}") + # Account limits -- fetched off-thread with a hard timeout so slow + # provider APIs don't hang the prompt. + provider = getattr(agent, "provider", None) or getattr(self, "provider", None) + base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None) + api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None) + account_snapshot = None + if provider: + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool: + try: + account_snapshot = _pool.submit( + fetch_account_usage, provider, + base_url=base_url, api_key=api_key, + ).result(timeout=10.0) + except (concurrent.futures.TimeoutError, Exception): + account_snapshot = None + account_lines = [f" {line}" for line in render_account_usage_lines(account_snapshot)] + if account_lines: + print() + for line in account_lines: + print(line) + if self.verbose: logging.getLogger().setLevel(logging.DEBUG) for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'): diff --git a/gateway/run.py b/gateway/run.py index 0343790b04..c19303e61b 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -30,6 +30,8 @@ from pathlib import Path from datetime import datetime from typing import Dict, Optional, Any, List +from agent.account_usage import fetch_account_usage, render_account_usage_lines + # --- Agent cache tuning --------------------------------------------------- # Bounds the per-session AIAgent cache to prevent unbounded growth in # long-lived gateways (each AIAgent holds LLM clients, tool schemas, @@ -7262,6 +7264,38 @@ class GatewayRunner: if cached: agent = cached[0] + # Resolve provider/base_url/api_key for the account-usage fetch. + # Prefer the live agent; fall back to persisted billing data on the + # SessionDB row so `/usage` still returns account info between turns + # when no agent is resident. + provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None + base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None + api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None + if not provider and getattr(self, "_session_db", None) is not None: + try: + _entry_for_billing = self.session_store.get_or_create_session(source) + persisted = self._session_db.get_session(_entry_for_billing.session_id) or {} + except Exception: + persisted = {} + provider = provider or persisted.get("billing_provider") + base_url = base_url or persisted.get("billing_base_url") + + # Fetch account usage off the event loop so slow provider APIs don't + # block the gateway. Failures are non-fatal -- account_lines stays []. + account_lines: list[str] = [] + if provider: + try: + account_snapshot = await asyncio.to_thread( + fetch_account_usage, + provider, + base_url=base_url, + api_key=api_key, + ) + except Exception: + account_snapshot = None + if account_snapshot: + account_lines = render_account_usage_lines(account_snapshot, markdown=True) + if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0: lines = [] @@ -7319,6 +7353,10 @@ class GatewayRunner: if ctx.compression_count: lines.append(f"Compressions: {ctx.compression_count}") + if account_lines: + lines.append("") + lines.extend(account_lines) + return "\n".join(lines) # No agent at all -- check session history for a rough count @@ -7328,12 +7366,18 @@ class GatewayRunner: from agent.model_metadata import estimate_messages_tokens_rough msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")] approx = estimate_messages_tokens_rough(msgs) - return ( - f"📊 **Session Info**\n" - f"Messages: {len(msgs)}\n" - f"Estimated context: ~{approx:,} tokens\n" - f"_(Detailed usage available after the first agent response)_" - ) + lines = [ + "📊 **Session Info**", + f"Messages: {len(msgs)}", + f"Estimated context: ~{approx:,} tokens", + "_(Detailed usage available after the first agent response)_", + ] + if account_lines: + lines.append("") + lines.extend(account_lines) + return "\n".join(lines) + if account_lines: + return "\n".join(account_lines) return "No usage data available for this session." async def _handle_insights_command(self, event: MessageEvent) -> str: diff --git a/tests/gateway/test_usage_command.py b/tests/gateway/test_usage_command.py index 2915810891..feced75b25 100644 --- a/tests/gateway/test_usage_command.py +++ b/tests/gateway/test_usage_command.py @@ -175,3 +175,79 @@ class TestUsageCachedAgent: result = await runner._handle_usage_command(event) assert "Cost: included" in result + + +class TestUsageAccountSection: + """Account-limits section appended to /usage output (PR #2486).""" + + @pytest.mark.asyncio + async def test_usage_command_includes_account_section(self, monkeypatch): + agent = _make_mock_agent(provider="openai-codex") + agent.base_url = "https://chatgpt.com/backend-api/codex" + agent.api_key = "unused" + runner = _make_runner(SK, cached_agent=agent) + event = MagicMock() + + monkeypatch.setattr( + "gateway.run.fetch_account_usage", + lambda provider, base_url=None, api_key=None: object(), + ) + monkeypatch.setattr( + "gateway.run.render_account_usage_lines", + lambda snapshot, markdown=False: [ + "📈 **Account limits**", + "Provider: openai-codex (Pro)", + "Session: 85% remaining (15% used)", + ], + ) + with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \ + patch("agent.usage_pricing.estimate_usage_cost") as mock_cost: + mock_cost.return_value = MagicMock(amount_usd=None, status="included") + result = await runner._handle_usage_command(event) + + assert "📊 **Session Token Usage**" in result + assert "📈 **Account limits**" in result + assert "Provider: openai-codex (Pro)" in result + + @pytest.mark.asyncio + async def test_usage_command_uses_persisted_provider_when_agent_not_running(self, monkeypatch): + runner = _make_runner(SK) + runner._session_db = MagicMock() + runner._session_db.get_session.return_value = { + "billing_provider": "openai-codex", + "billing_base_url": "https://chatgpt.com/backend-api/codex", + } + session_entry = MagicMock() + session_entry.session_id = "sess-1" + runner.session_store.get_or_create_session.return_value = session_entry + runner.session_store.load_transcript.return_value = [ + {"role": "user", "content": "earlier"}, + ] + + calls = {} + + async def _fake_to_thread(fn, *args, **kwargs): + calls["args"] = args + calls["kwargs"] = kwargs + return fn(*args, **kwargs) + + monkeypatch.setattr("gateway.run.asyncio.to_thread", _fake_to_thread) + monkeypatch.setattr( + "gateway.run.fetch_account_usage", + lambda provider, base_url=None, api_key=None: object(), + ) + monkeypatch.setattr( + "gateway.run.render_account_usage_lines", + lambda snapshot, markdown=False: [ + "📈 **Account limits**", + "Provider: openai-codex (Pro)", + ], + ) + + event = MagicMock() + result = await runner._handle_usage_command(event) + + assert calls["args"] == ("openai-codex",) + assert calls["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex" + assert "📊 **Session Info**" in result + assert "📈 **Account limits**" in result From 4fea1769d2968a3c9ab2557c9839db0b85e2aba3 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 01:56:55 -0700 Subject: [PATCH 31/63] feat(opencode-go): add Kimi K2.6 and Qwen3.5/3.6 Plus to curated catalog (#13429) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenCode Go's published model list (opencode.ai/docs/go) includes kimi-k2.6, qwen3.5-plus, and qwen3.6-plus, but Hermes' curated lists didn't carry them. When the live /models probe fails during `hermes model`, users fell back to the stale curated list and had to type newer models via 'Enter custom model name'. Adds kimi-k2.6 (now first in the Go list), qwen3.6-plus, and qwen3.5-plus to both the model picker (hermes_cli/models.py) and setup defaults (hermes_cli/setup.py). All routed through the existing opencode-go chat_completions path — no api_mode changes needed. --- hermes_cli/models.py | 3 +++ hermes_cli/setup.py | 2 +- tests/hermes_cli/test_opencode_go_in_model_list.py | 2 +- 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index e8772d246d..1e5abb97e9 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -292,6 +292,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "big-pickle", ], "opencode-go": [ + "kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", @@ -299,6 +300,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5", + "qwen3.6-plus", + "qwen3.5-plus", ], "kilocode": [ "anthropic/claude-opus-4.6", diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 7eb25965ae..d7eb7b734a 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -102,7 +102,7 @@ _DEFAULT_PROVIDER_MODELS = { "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"], "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"], - "opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"], + "opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"], "huggingface": [ "Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507", "Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528", diff --git a/tests/hermes_cli/test_opencode_go_in_model_list.py b/tests/hermes_cli/test_opencode_go_in_model_list.py index a84701f09c..647ee2bee8 100644 --- a/tests/hermes_cli/test_opencode_go_in_model_list.py +++ b/tests/hermes_cli/test_opencode_go_in_model_list.py @@ -15,7 +15,7 @@ def test_opencode_go_appears_when_api_key_set(): opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None) assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set" - assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"] + assert opencode_go["models"] == ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"] # opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when # models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when # the API is unavailable, e.g. in CI). From 15abf4ed8fe311bfca6faf25e7548f2000f13471 Mon Sep 17 00:00:00 2001 From: teyrebaz33 Date: Sun, 22 Mar 2026 18:12:01 +0300 Subject: [PATCH 32/63] feat(patch): add 'did you mean?' feedback when patch fails to match When patch_replace() cannot find old_string in a file, the error message now includes the closest matching lines from the file with line numbers and context. This helps the LLM self-correct without a separate read_file call. Implements Phase 1 of #536: enhanced patch error feedback with no architectural changes. - tools/fuzzy_match.py: new find_closest_lines() using SequenceMatcher - tools/file_operations.py: attach closest-lines hint to patch errors - tests/tools/test_fuzzy_match.py: 5 new tests for find_closest_lines --- tests/tools/test_fuzzy_match.py | 32 +++++++++++++++++ tools/file_operations.py | 16 +++++---- tools/fuzzy_match.py | 62 +++++++++++++++++++++++++++++++++ 3 files changed, 104 insertions(+), 6 deletions(-) diff --git a/tests/tools/test_fuzzy_match.py b/tests/tools/test_fuzzy_match.py index 7a03065f4e..9db45b7a5e 100644 --- a/tests/tools/test_fuzzy_match.py +++ b/tests/tools/test_fuzzy_match.py @@ -230,3 +230,35 @@ class TestEscapeDriftGuard: new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string) assert err is None assert count == 1 + + +class TestFindClosestLines: + def setup_method(self): + from tools.fuzzy_match import find_closest_lines + self.find_closest_lines = find_closest_lines + + def test_finds_similar_line(self): + content = "def foo():\n pass\ndef bar():\n return 1\n" + result = self.find_closest_lines("def baz():", content) + assert "def foo" in result or "def bar" in result + + def test_returns_empty_for_no_match(self): + content = "completely different content here" + result = self.find_closest_lines("xyzzy_no_match_possible_!!!", content) + assert result == "" + + def test_returns_empty_for_empty_inputs(self): + assert self.find_closest_lines("", "some content") == "" + assert self.find_closest_lines("old string", "") == "" + + def test_includes_context_lines(self): + content = "line1\nline2\ndef target():\n pass\nline5\n" + result = self.find_closest_lines("def target():", content) + assert "target" in result + + def test_includes_line_numbers(self): + content = "line1\nline2\ndef foo():\n pass\n" + result = self.find_closest_lines("def foo():", content) + # Should include line numbers in format "N| content" + assert "|" in result + diff --git a/tools/file_operations.py b/tools/file_operations.py index 59070d7ce0..c9b5d3d644 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -738,12 +738,16 @@ class ShellFileOperations(FileOperations): content, old_string, new_string, replace_all ) - if error: - return PatchResult(error=error) - - if match_count == 0: - return PatchResult(error=f"Could not find match for old_string in {path}") - + if error or match_count == 0: + err_msg = error or f"Could not find match for old_string in {path}" + try: + from tools.fuzzy_match import find_closest_lines + hint = find_closest_lines(old_string, content) + if hint: + err_msg += "\n\nDid you mean one of these sections?\n" + hint + except Exception: + pass + return PatchResult(error=err_msg) # Write back write_result = self.write_file(path, new_content) if write_result.error: diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py index a9dc4272ef..301794644e 100644 --- a/tools/fuzzy_match.py +++ b/tools/fuzzy_match.py @@ -619,3 +619,65 @@ def _map_normalized_positions(original: str, normalized: str, original_matches.append((orig_start, min(orig_end, len(original)))) return original_matches + + +def find_closest_lines(old_string: str, content: str, context_lines: int = 2, max_results: int = 3) -> str: + """Find lines in content most similar to old_string for "did you mean?" feedback. + + Returns a formatted string showing the closest matching lines with context, + or empty string if no useful match is found. + """ + if not old_string or not content: + return "" + + old_lines = old_string.splitlines() + content_lines = content.splitlines() + + if not old_lines or not content_lines: + return "" + + # Use first line of old_string as anchor for search + anchor = old_lines[0].strip() + if not anchor: + # Try second line if first is blank + candidates = [l.strip() for l in old_lines if l.strip()] + if not candidates: + return "" + anchor = candidates[0] + + # Score each line in content by similarity to anchor + scored = [] + for i, line in enumerate(content_lines): + stripped = line.strip() + if not stripped: + continue + ratio = SequenceMatcher(None, anchor, stripped).ratio() + if ratio > 0.3: + scored.append((ratio, i)) + + if not scored: + return "" + + # Take top matches + scored.sort(key=lambda x: -x[0]) + top = scored[:max_results] + + parts = [] + seen_ranges = set() + for _, line_idx in top: + start = max(0, line_idx - context_lines) + end = min(len(content_lines), line_idx + len(old_lines) + context_lines) + key = (start, end) + if key in seen_ranges: + continue + seen_ranges.add(key) + snippet = "\n".join( + f"{start + j + 1:4d}| {content_lines[start + j]}" + for j in range(end - start) + ) + parts.append(snippet) + + if not parts: + return "" + + return "\n---\n".join(parts) From 5e6427a42c75477cc01782328b8c47dad3240667 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 01:59:58 -0700 Subject: [PATCH 33/63] fix(patch): gate 'did you mean?' to no-match + extend to v4a/skill_manage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-ups on top of @teyrebaz33's cherry-picked commit: 1. New shared helper format_no_match_hint() in fuzzy_match.py with a startswith('Could not find') gate so the snippet only appends to genuine no-match errors — not to 'Found N matches' (ambiguous), 'Escape-drift detected', or 'identical strings' errors, which would all mislead the model. 2. file_tools.patch_tool suppresses the legacy generic '[Hint: old_string not found...]' string when the rich 'Did you mean?' snippet is already attached — no more double-hint. 3. Wire the same helper into patch_parser.py (V4A patch mode, both _validate_operations and _apply_update) and skill_manager_tool.py so all three fuzzy callers surface the hint consistently. Tests: 7 new gating tests in TestFormatNoMatchHint cover every error class (ambiguous, drift, identical, non-zero match count, None error, no similar content, happy path). 34/34 test_fuzzy_match, 96/96 test_file_tools + test_patch_parser + test_skill_manager_tool pass. E2E verified across all four scenarios: no-match-with-similar, no-match-no-similar, ambiguous, success. V4A mode confirmed end-to-end with a non-matching hunk. --- tests/tools/test_fuzzy_match.py | 67 +++++++++++++++++++++++++++++++++ tools/file_operations.py | 6 +-- tools/file_tools.py | 5 ++- tools/fuzzy_match.py | 21 +++++++++++ tools/patch_parser.py | 16 +++++++- tools/skill_manager_tool.py | 8 +++- 6 files changed, 115 insertions(+), 8 deletions(-) diff --git a/tests/tools/test_fuzzy_match.py b/tests/tools/test_fuzzy_match.py index 9db45b7a5e..3f7d315820 100644 --- a/tests/tools/test_fuzzy_match.py +++ b/tests/tools/test_fuzzy_match.py @@ -262,3 +262,70 @@ class TestFindClosestLines: # Should include line numbers in format "N| content" assert "|" in result + +class TestFormatNoMatchHint: + """Gating tests for format_no_match_hint — the shared helper that decides + whether a 'Did you mean?' snippet should be appended to an error. + """ + + def setup_method(self): + from tools.fuzzy_match import format_no_match_hint + self.fmt = format_no_match_hint + + def test_fires_on_could_not_find_with_match(self): + """Classic no-match: similar content exists → hint fires.""" + content = "def foo():\n pass\ndef bar():\n pass\n" + result = self.fmt( + "Could not find a match for old_string in the file", + 0, "def baz():", content, + ) + assert "Did you mean" in result + assert "foo" in result or "bar" in result + + def test_silent_on_ambiguous_match_error(self): + """'Found N matches' is not a missing-match failure — no hint.""" + content = "aaa bbb aaa\n" + result = self.fmt( + "Found 2 matches for old_string. Provide more context to make it unique, or use replace_all=True.", + 0, "aaa", content, + ) + assert result == "" + + def test_silent_on_escape_drift_error(self): + """Escape-drift errors are intentional blocks — hint would mislead.""" + content = "x = 1\n" + result = self.fmt( + "Escape-drift detected: old_string and new_string contain the literal sequence '\\\\''...", + 0, "x = \\'1\\'", content, + ) + assert result == "" + + def test_silent_on_identical_strings(self): + """old_string == new_string — hint irrelevant.""" + result = self.fmt( + "old_string and new_string are identical", + 0, "foo", "foo bar\n", + ) + assert result == "" + + def test_silent_when_match_count_nonzero(self): + """If match succeeded, we shouldn't be in the error path — defense in depth.""" + result = self.fmt( + "Could not find a match for old_string in the file", + 1, "foo", "foo bar\n", + ) + assert result == "" + + def test_silent_on_none_error(self): + """No error at all — no hint.""" + result = self.fmt(None, 0, "foo", "bar\n") + assert result == "" + + def test_silent_when_no_similar_content(self): + """Even for a valid no-match error, skip hint when nothing similar exists.""" + result = self.fmt( + "Could not find a match for old_string in the file", + 0, "totally_unique_xyzzy_qux", "abc\nxyz\n", + ) + assert result == "" + diff --git a/tools/file_operations.py b/tools/file_operations.py index c9b5d3d644..87ad139689 100644 --- a/tools/file_operations.py +++ b/tools/file_operations.py @@ -741,10 +741,8 @@ class ShellFileOperations(FileOperations): if error or match_count == 0: err_msg = error or f"Could not find match for old_string in {path}" try: - from tools.fuzzy_match import find_closest_lines - hint = find_closest_lines(old_string, content) - if hint: - err_msg += "\n\nDid you mean one of these sections?\n" + hint + from tools.fuzzy_match import format_no_match_hint + err_msg += format_no_match_hint(err_msg, match_count, old_string, content) except Exception: pass return PatchResult(error=err_msg) diff --git a/tools/file_tools.py b/tools/file_tools.py index af6701f823..5b44ff03d3 100644 --- a/tools/file_tools.py +++ b/tools/file_tools.py @@ -670,8 +670,11 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None, result_json = json.dumps(result_dict, ensure_ascii=False) # Hint when old_string not found — saves iterations where the agent # retries with stale content instead of re-reading the file. + # Suppressed when patch_replace already attached a rich "Did you mean?" + # snippet (which is strictly more useful than the generic hint). if result_dict.get("error") and "Could not find" in str(result_dict["error"]): - result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]" + if "Did you mean one of these sections?" not in str(result_dict["error"]): + result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]" return result_json except Exception as e: return tool_error(str(e)) diff --git a/tools/fuzzy_match.py b/tools/fuzzy_match.py index 301794644e..9a922cd9b3 100644 --- a/tools/fuzzy_match.py +++ b/tools/fuzzy_match.py @@ -681,3 +681,24 @@ def find_closest_lines(old_string: str, content: str, context_lines: int = 2, ma return "" return "\n---\n".join(parts) + + +def format_no_match_hint(error: Optional[str], match_count: int, + old_string: str, content: str) -> str: + """Return a '\\n\\nDid you mean...' snippet for plain no-match errors. + + Gated so the hint only fires for actual "old_string not found" failures. + Ambiguous-match ("Found N matches"), escape-drift, and identical-strings + errors all have ``match_count == 0`` but a "did you mean?" snippet would + be misleading — those failed for unrelated reasons. + + Returns an empty string when there's nothing useful to append. + """ + if match_count != 0: + return "" + if not error or not error.startswith("Could not find"): + return "" + hint = find_closest_lines(old_string, content) + if not hint: + return "" + return "\n\nDid you mean one of these sections?\n" + hint diff --git a/tools/patch_parser.py b/tools/patch_parser.py index 0c961083c2..d2a298fc9f 100644 --- a/tools/patch_parser.py +++ b/tools/patch_parser.py @@ -290,10 +290,16 @@ def _validate_operations( ) if count == 0: label = f"'{hunk.context_hint}'" if hunk.context_hint else "(no hint)" - errors.append( + msg = ( f"{op.file_path}: hunk {label} not found" + (f" — {match_error}" if match_error else "") ) + try: + from tools.fuzzy_match import format_no_match_hint + msg += format_no_match_hint(match_error, count, search_pattern, simulated) + except Exception: + pass + errors.append(msg) else: # Advance simulation so subsequent hunks validate correctly. # Reuse the result from the call above — no second fuzzy run. @@ -537,7 +543,13 @@ def _apply_update(op: PatchOperation, file_ops: Any) -> Tuple[bool, str]: error = None if error: - return False, f"Could not apply hunk: {error}" + err_msg = f"Could not apply hunk: {error}" + try: + from tools.fuzzy_match import format_no_match_hint + err_msg += format_no_match_hint(error, 0, search_pattern, new_content) + except Exception: + pass + return False, err_msg else: # Addition-only hunk (no context or removed lines). # Insert at the location indicated by the context hint, or at end of file. diff --git a/tools/skill_manager_tool.py b/tools/skill_manager_tool.py index 33d3976ea8..493b434c51 100644 --- a/tools/skill_manager_tool.py +++ b/tools/skill_manager_tool.py @@ -449,9 +449,15 @@ def _patch_skill( if match_error: # Show a short preview of the file so the model can self-correct preview = content[:500] + ("..." if len(content) > 500 else "") + err_msg = match_error + try: + from tools.fuzzy_match import format_no_match_hint + err_msg += format_no_match_hint(match_error, match_count, old_string, content) + except Exception: + pass return { "success": False, - "error": match_error, + "error": err_msg, "file_preview": preview, } From 77061ac99541b4e31ac7a93ff3bf7764402bc82c Mon Sep 17 00:00:00 2001 From: JackTheGit Date: Tue, 21 Apr 2026 01:56:47 -0700 Subject: [PATCH 34/63] Normalize FAL_KEY env handling (ignore whitespace-only values) Treat whitespace-only FAL_KEY the same as unset so users who export FAL_KEY=" " (or CI that leaves a blank token) get the expected 'not set' error path instead of a confusing downstream fal_client failure. Applied to the two direct FAL_KEY checks in image_generation_tool.py: image_generate_tool's upfront credential check and check_fal_api_key(). Both keep the existing managed-gateway fallback intact. Adapted the original whitespace/valid tests to pin the managed gateway to None so the whitespace assertion exercises the direct-key path rather than silently relying on gateway absence. --- tests/tools/test_image_generation_env.py | 39 ++++++++++++++++++++++++ tools/image_generation_tool.py | 8 +++-- 2 files changed, 45 insertions(+), 2 deletions(-) create mode 100644 tests/tools/test_image_generation_env.py diff --git a/tests/tools/test_image_generation_env.py b/tests/tools/test_image_generation_env.py new file mode 100644 index 0000000000..fc4e655334 --- /dev/null +++ b/tests/tools/test_image_generation_env.py @@ -0,0 +1,39 @@ +"""FAL_KEY env var normalization (whitespace-only treated as unset).""" + + +def test_fal_key_whitespace_is_unset(monkeypatch): + # Whitespace-only FAL_KEY must NOT register as configured, and the managed + # gateway fallback must be disabled for this assertion to be meaningful. + monkeypatch.setenv("FAL_KEY", " ") + + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "_resolve_managed_fal_gateway", lambda: None + ) + + assert image_generation_tool.check_fal_api_key() is False + + +def test_fal_key_valid(monkeypatch): + monkeypatch.setenv("FAL_KEY", "sk-test") + + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "_resolve_managed_fal_gateway", lambda: None + ) + + assert image_generation_tool.check_fal_api_key() is True + + +def test_fal_key_empty_is_unset(monkeypatch): + monkeypatch.setenv("FAL_KEY", "") + + from tools import image_generation_tool + + monkeypatch.setattr( + image_generation_tool, "_resolve_managed_fal_gateway", lambda: None + ) + + assert image_generation_tool.check_fal_api_key() is False diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py index 13f17abe30..e10b8453cc 100644 --- a/tools/image_generation_tool.py +++ b/tools/image_generation_tool.py @@ -623,7 +623,9 @@ def image_generate_tool( if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0: raise ValueError("Prompt is required and must be a non-empty string") - if not (os.getenv("FAL_KEY") or _resolve_managed_fal_gateway()): + fal_key_value = os.getenv("FAL_KEY") + fal_key_set = bool(fal_key_value and fal_key_value.strip()) + if not (fal_key_set or _resolve_managed_fal_gateway()): message = "FAL_KEY environment variable not set" if managed_nous_tools_enabled(): message += " and managed FAL gateway is unavailable" @@ -734,7 +736,9 @@ def image_generate_tool( def check_fal_api_key() -> bool: """True if the FAL.ai API key (direct or managed gateway) is available.""" - return bool(os.getenv("FAL_KEY") or _resolve_managed_fal_gateway()) + fal_key_value = os.getenv("FAL_KEY") + fal_key_set = bool(fal_key_value and fal_key_value.strip()) + return bool(fal_key_set or _resolve_managed_fal_gateway()) def check_image_generation_requirements() -> bool: From 2e722ee29ae2acebe2051b35303eb4a29f7cfcfc Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 01:59:15 -0700 Subject: [PATCH 35/63] fix(fal): extend whitespace-only FAL_KEY handling to all call sites Follow-up to PR #2504. The original fix covered the two direct FAL_KEY checks in image_generation_tool but left four other call sites intact, including the managed-gateway gate where a whitespace-only FAL_KEY falsely claimed 'user has direct FAL' and *skipped* the Nous managed gateway fallback entirely. Introduce fal_key_is_configured() in tools/tool_backend_helpers.py as a single source of truth (consults os.environ, falls back to .env for CLI-setup paths) and route every FAL_KEY presence check through it: - tools/image_generation_tool.py : _resolve_managed_fal_gateway, image_generate_tool's upfront check, check_fal_api_key - hermes_cli/nous_subscription.py : direct_fal detection, selected toolset gating, tools_ready map - hermes_cli/tools_config.py : image_gen needs-setup check Verified by extending tests/tools/test_image_generation_env.py and by E2E exercising whitespace + managed-gateway composition directly. --- hermes_cli/nous_subscription.py | 7 ++++--- hermes_cli/tools_config.py | 4 ++-- tools/image_generation_tool.py | 16 ++++++++-------- tools/tool_backend_helpers.py | 21 +++++++++++++++++++++ 4 files changed, 35 insertions(+), 13 deletions(-) diff --git a/hermes_cli/nous_subscription.py b/hermes_cli/nous_subscription.py index a4883b056b..78181aab2b 100644 --- a/hermes_cli/nous_subscription.py +++ b/hermes_cli/nous_subscription.py @@ -10,6 +10,7 @@ from hermes_cli.auth import get_nous_auth_status from hermes_cli.config import get_env_value, load_config from tools.managed_tool_gateway import is_managed_tool_gateway_ready from tools.tool_backend_helpers import ( + fal_key_is_configured, has_direct_modal_credentials, managed_nous_tools_enabled, normalize_browser_cloud_provider, @@ -271,7 +272,7 @@ def get_nous_subscription_features( direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL")) direct_parallel = bool(get_env_value("PARALLEL_API_KEY")) direct_tavily = bool(get_env_value("TAVILY_API_KEY")) - direct_fal = bool(get_env_value("FAL_KEY")) + direct_fal = fal_key_is_configured() direct_openai_tts = bool(resolve_openai_audio_api_key()) direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY")) direct_camofox = bool(get_env_value("CAMOFOX_URL")) @@ -520,7 +521,7 @@ def apply_nous_managed_defaults( browser_cfg["cloud_provider"] = "browser-use" changed.add("browser") - if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"): + if "image_gen" in selected_toolsets and not fal_key_is_configured(): changed.add("image_gen") return changed @@ -548,7 +549,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]: or get_env_value("TAVILY_API_KEY") or get_env_value("EXA_API_KEY") ), - "image_gen": bool(get_env_value("FAL_KEY")), + "image_gen": fal_key_is_configured(), "tts": bool( resolve_openai_audio_api_key() or get_env_value("ELEVENLABS_API_KEY") diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index 24c5fde5fb..36b3c7f3f3 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -24,7 +24,7 @@ from hermes_cli.nous_subscription import ( apply_nous_managed_defaults, get_nous_subscription_features, ) -from tools.tool_backend_helpers import managed_nous_tools_enabled +from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled from utils import base_url_hostname logger = logging.getLogger(__name__) @@ -876,7 +876,7 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool: browser_cfg = config.get("browser", {}) return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg if ts_key == "image_gen": - return not get_env_value("FAL_KEY") + return not fal_key_is_configured() return not _toolset_has_keys(ts_key, config) diff --git a/tools/image_generation_tool.py b/tools/image_generation_tool.py index e10b8453cc..13e95ef2dd 100644 --- a/tools/image_generation_tool.py +++ b/tools/image_generation_tool.py @@ -33,7 +33,11 @@ import fal_client from tools.debug_helpers import DebugSession from tools.managed_tool_gateway import resolve_managed_tool_gateway -from tools.tool_backend_helpers import managed_nous_tools_enabled, prefers_gateway +from tools.tool_backend_helpers import ( + fal_key_is_configured, + managed_nous_tools_enabled, + prefers_gateway, +) logger = logging.getLogger(__name__) @@ -286,7 +290,7 @@ _managed_fal_client_lock = threading.Lock() def _resolve_managed_fal_gateway(): """Return managed fal-queue gateway config when the user prefers the gateway or direct FAL credentials are absent.""" - if os.getenv("FAL_KEY") and not prefers_gateway("image_gen"): + if fal_key_is_configured() and not prefers_gateway("image_gen"): return None return resolve_managed_tool_gateway("fal-queue") @@ -623,9 +627,7 @@ def image_generate_tool( if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0: raise ValueError("Prompt is required and must be a non-empty string") - fal_key_value = os.getenv("FAL_KEY") - fal_key_set = bool(fal_key_value and fal_key_value.strip()) - if not (fal_key_set or _resolve_managed_fal_gateway()): + if not (fal_key_is_configured() or _resolve_managed_fal_gateway()): message = "FAL_KEY environment variable not set" if managed_nous_tools_enabled(): message += " and managed FAL gateway is unavailable" @@ -736,9 +738,7 @@ def image_generate_tool( def check_fal_api_key() -> bool: """True if the FAL.ai API key (direct or managed gateway) is available.""" - fal_key_value = os.getenv("FAL_KEY") - fal_key_set = bool(fal_key_value and fal_key_value.strip()) - return bool(fal_key_set or _resolve_managed_fal_gateway()) + return bool(fal_key_is_configured() or _resolve_managed_fal_gateway()) def check_image_generation_requirements() -> bool: diff --git a/tools/tool_backend_helpers.py b/tools/tool_backend_helpers.py index a770fe7470..810a51c63d 100644 --- a/tools/tool_backend_helpers.py +++ b/tools/tool_backend_helpers.py @@ -119,3 +119,24 @@ def prefers_gateway(config_section: str) -> bool: except Exception: pass return False + + +def fal_key_is_configured() -> bool: + """Return True when FAL_KEY is set to a non-whitespace value. + + Consults both ``os.environ`` and ``~/.hermes/.env`` (via + ``hermes_cli.config.get_env_value`` when available) so tool-side + checks and CLI setup-time checks agree. A whitespace-only value + is treated as unset everywhere. + """ + value = os.getenv("FAL_KEY") + if value is None: + # Fall back to the .env file for CLI paths that may run before + # dotenv is loaded into os.environ. + try: + from hermes_cli.config import get_env_value + + value = get_env_value("FAL_KEY") + except Exception: + value = None + return bool(value and value.strip()) From b0939d92109e6de0c42d3f7916de720e9d3c1b66 Mon Sep 17 00:00:00 2001 From: pinion05 Date: Wed, 8 Apr 2026 14:13:06 +0900 Subject: [PATCH 36/63] fix: slash commands now respect require_mention in Telegram groups When require_mention is enabled, slash commands no longer bypass mention checks. Bare /command without @mention is filtered in groups, while /command@botname (bot menu) and @botname /command still pass. Commands still pass unconditionally when require_mention is disabled, preserving backward compatibility. Closes #6033 --- gateway/platforms/telegram.py | 10 +++++++--- tests/gateway/test_telegram_group_gating.py | 8 +++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index cfad233e68..156251e54c 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -2333,10 +2333,16 @@ class TelegramAdapter(BasePlatformAdapter): DMs remain unrestricted. Group/supergroup messages are accepted when: - the chat is explicitly allowlisted in ``free_response_chats`` - ``require_mention`` is disabled - - the message is a command - the message replies to the bot - the bot is @mentioned - the text/caption matches a configured regex wake-word pattern + + When ``require_mention`` is enabled, slash commands are not given + special treatment — they must pass the same mention/reply checks + as any other group message. Users can still trigger commands via + the Telegram bot menu (``/command@botname``) or by explicitly + mentioning the bot (``@botname /command``), both of which are + recognised as mentions by :meth:`_message_mentions_bot`. """ if not self._is_group_chat(message): return True @@ -2351,8 +2357,6 @@ class TelegramAdapter(BasePlatformAdapter): return True if not self._telegram_require_mention(): return True - if is_command: - return True if self._is_reply_to_bot(message): return True if self._message_mentions_bot(message): diff --git a/tests/gateway/test_telegram_group_gating.py b/tests/gateway/test_telegram_group_gating.py index 15ffca9ec3..82a19adf97 100644 --- a/tests/gateway/test_telegram_group_gating.py +++ b/tests/gateway/test_telegram_group_gating.py @@ -71,7 +71,13 @@ def test_group_messages_can_require_direct_trigger_via_config(): assert adapter._should_process_message(_group_message("hello everyone")) is False assert adapter._should_process_message(_group_message("hi @hermes_bot", entities=[_mention_entity("hi @hermes_bot")])) is True assert adapter._should_process_message(_group_message("replying", reply_to_bot=True)) is True - assert adapter._should_process_message(_group_message("/status"), is_command=True) is True + # Commands must also respect require_mention when it is enabled + assert adapter._should_process_message(_group_message("/status"), is_command=True) is False + # But commands with @mention still pass + assert adapter._should_process_message(_group_message("/status@hermes_bot")) is True + # And commands still pass unconditionally when require_mention is disabled + adapter_no_mention = _make_adapter(require_mention=False) + assert adapter_no_mention._should_process_message(_group_message("/status"), is_command=True) is True def test_free_response_chats_bypass_mention_requirement(): From c1fe6339b7f3e5c362af6803d0695b8eb60dfaff Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 02:06:45 -0700 Subject: [PATCH 37/63] test(telegram): update /cmd@botname assertion for entity-only detection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current main's _message_mentions_bot() uses MessageEntity-only detection (commit e330112a), so the test for '/status@hermes_bot' needs to include a MENTION entity. Real Telegram always emits one for /cmd@botname — the bot menu and CommandHandler rely on this mechanism. --- tests/gateway/test_telegram_group_gating.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/gateway/test_telegram_group_gating.py b/tests/gateway/test_telegram_group_gating.py index 82a19adf97..0381cf6f46 100644 --- a/tests/gateway/test_telegram_group_gating.py +++ b/tests/gateway/test_telegram_group_gating.py @@ -73,8 +73,12 @@ def test_group_messages_can_require_direct_trigger_via_config(): assert adapter._should_process_message(_group_message("replying", reply_to_bot=True)) is True # Commands must also respect require_mention when it is enabled assert adapter._should_process_message(_group_message("/status"), is_command=True) is False - # But commands with @mention still pass - assert adapter._should_process_message(_group_message("/status@hermes_bot")) is True + # But commands with @mention still pass (Telegram emits a MENTION entity + # for /cmd@botname — the bot menu and python-telegram-bot's CommandHandler + # rely on this same mechanism) + assert adapter._should_process_message( + _group_message("/status@hermes_bot", entities=[_mention_entity("/status@hermes_bot")]) + ) is True # And commands still pass unconditionally when require_mention is disabled adapter_no_mention = _make_adapter(require_mention=False) assert adapter_no_mention._should_process_message(_group_message("/status"), is_command=True) is True From 554db8e6cf80fd6654d5089d2cbb392ced3fc209 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 02:06:45 -0700 Subject: [PATCH 38/63] chore(release): add pinion05 to AUTHOR_MAP --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index c8ceed0867..e36b41d032 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -327,6 +327,7 @@ AUTHOR_MAP = { "aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80", "zheng.jerilyn@gmail.com": "jerilynzheng", "asslaenn5@gmail.com": "Aslaaen", + "shalompmc0505@naver.com": "pinion05", } From d1cfe53d857dcfc94dffd9adbb38d7020c26747d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 03:09:10 -0700 Subject: [PATCH 39/63] docs(xurl skill): document UsernameNotFound workaround (xurl v1.1.0) (#13458) xurl v1.1.0 added an optional USERNAME positional to `xurl auth oauth2` that skips the `/2/users/me` lookup, which has been returning 403/UsernameNotFound for many devs. Documents the workaround in both setup (step 5) and troubleshooting. Reported by @itechnologynet. --- skills/social-media/xurl/SKILL.md | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/skills/social-media/xurl/SKILL.md b/skills/social-media/xurl/SKILL.md index 3ce1908084..1f47b2e6a0 100644 --- a/skills/social-media/xurl/SKILL.md +++ b/skills/social-media/xurl/SKILL.md @@ -1,7 +1,7 @@ --- name: xurl description: Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access. -version: 1.1.0 +version: 1.1.1 author: xdevplatform + openclaw + Hermes Agent license: MIT platforms: [linux, macos] @@ -95,6 +95,12 @@ These steps must be performed by the user directly, NOT by the agent, because th xurl auth oauth2 --app my-app ``` (This opens a browser for the OAuth 2.0 PKCE flow.) + + If X returns a `UsernameNotFound` error or 403 on the post-OAuth `/2/users/me` lookup, pass your handle explicitly (xurl v1.1.0+): + ```bash + xurl auth oauth2 --app my-app YOUR_USERNAME + ``` + This binds the token to your handle and skips the broken `/2/users/me` call. 6. Set the app as default so all commands use it: ```bash xurl auth default my-app @@ -380,6 +386,7 @@ xurl --app staging /2/users/me # one-off against staging | --- | --- | --- | | Auth errors after successful OAuth flow | Token saved to `default` app (no client-id/secret) instead of your named app | `xurl auth oauth2 --app my-app` then `xurl auth default my-app` | | `unauthorized_client` during OAuth | App type set to "Native App" in X dashboard | Change to "Web app, automated app or bot" in User Authentication Settings | +| `UsernameNotFound` or 403 on `/2/users/me` right after OAuth | X not returning username reliably from `/2/users/me` | Re-run `xurl auth oauth2 --app my-app YOUR_USERNAME` (xurl v1.1.0+) to pass the handle explicitly | | 401 on every request | Token expired or wrong default app | Check `xurl auth status` — verify `▸` points to an app with oauth2 tokens | | `client-forbidden` / `client-not-enrolled` | X platform enrollment issue | Dashboard → Apps → Manage → Move to "Pay-per-use" package → Production environment | | `CreditsDepleted` | $0 balance on X API | Buy credits (min $5) in Developer Console → Billing | From c6974043eff246274a8079466a3a4d22ab13ee6c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 03:39:55 -0700 Subject: [PATCH 40/63] refactor(acp): validate method_id against advertised provider in authenticate() (#13468) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat(models): hide OpenRouter models that don't advertise tool support Port from Kilo-Org/kilocode#9068. hermes-agent is tool-calling-first — every provider path assumes the model can invoke tools. Models whose OpenRouter supported_parameters doesn't include 'tools' (e.g. image-only or completion-only models) cannot be driven by the agent loop and fail at the first tool call. Filter them out of fetch_openrouter_models() so they never appear in the model picker (`hermes model`, setup wizard, /model slash command). Permissive when the field is missing — OpenRouter-compatible gateways (Nous Portal, private mirrors, older snapshots) don't always populate supported_parameters. Treat missing as 'unknown → allow' rather than silently emptying the picker on those gateways. Only hide models whose supported_parameters is an explicit list that omits tools. Tests cover: tools present → kept, tools absent → dropped, field missing → kept, malformed non-list → kept, non-dict item → kept, empty list → dropped. * refactor(acp): validate method_id against advertised provider in authenticate() Previously authenticate() accepted any method_id whenever the server had provider credentials configured. This was not a vulnerability under the personal-assistant trust model (ACP is stdio-only, local-trust — anything that can reach the transport is already code-execution-equivalent to the user), but it was sloppy API hygiene: the advertised auth_methods list from initialize() was effectively ignored. Now authenticate() only returns AuthenticateResponse when method_id matches the currently-advertised provider (case-insensitive). Mismatched or missing method_id returns None, consistent with the no-credentials case. Raised by xeloxa via GHSA-g5pf-8w9m-h72x. Declined as a CVE (ACP transport is stdio, local-trust model), but the correctness fix is worth having on its own. --- acp_adapter/server.py | 17 ++++- hermes_cli/models.py | 30 ++++++++ tests/acp/test_server.py | 28 +++++-- tests/hermes_cli/test_models.py | 125 ++++++++++++++++++++++++++++++++ 4 files changed, 191 insertions(+), 9 deletions(-) diff --git a/acp_adapter/server.py b/acp_adapter/server.py index 119a08685a..aa886cfbdc 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -51,7 +51,7 @@ try: except ImportError: from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined] -from acp_adapter.auth import detect_provider, has_provider +from acp_adapter.auth import detect_provider from acp_adapter.events import ( make_message_cb, make_step_cb, @@ -351,9 +351,18 @@ class HermesACPAgent(acp.Agent): ) async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None: - if has_provider(): - return AuthenticateResponse() - return None + # Only accept authenticate() calls whose method_id matches the + # provider we advertised in initialize(). Without this check, + # authenticate() would acknowledge any method_id as long as the + # server has provider credentials configured — harmless under + # Hermes' threat model (ACP is stdio-only, local-trust), but poor + # API hygiene and confusing if ACP ever grows multi-method auth. + provider = detect_provider() + if not provider: + return None + if not isinstance(method_id, str) or method_id.strip().lower() != provider: + return None + return AuthenticateResponse() # ---- Session management ------------------------------------------------- diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 1e5abb97e9..ae54217952 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -688,6 +688,31 @@ def _openrouter_model_is_free(pricing: Any) -> bool: return False +def _openrouter_model_supports_tools(item: Any) -> bool: + """Return True when the model's ``supported_parameters`` advertise tool calling. + + hermes-agent is tool-calling-first — every provider path assumes the model + can invoke tools. Models that don't advertise ``tools`` in their + ``supported_parameters`` (e.g. image-only or completion-only models) cannot + be driven by the agent loop and would fail at the first tool call. + + **Permissive when the field is missing.** Some OpenRouter-compatible gateways + (Nous Portal, private mirrors, older catalog snapshots) don't populate + ``supported_parameters`` at all. Treat that as "unknown capability → allow" + so the picker doesn't silently empty for those users. Only hide models + whose ``supported_parameters`` is an explicit list that omits ``tools``. + + Ported from Kilo-Org/kilocode#9068. + """ + if not isinstance(item, dict): + return True + params = item.get("supported_parameters") + if not isinstance(params, list): + # Field absent / malformed / None — be permissive. + return True + return "tools" in params + + def fetch_openrouter_models( timeout: float = 8.0, *, @@ -730,6 +755,11 @@ def fetch_openrouter_models( live_item = live_by_id.get(preferred_id) if live_item is None: continue + # Hide models that don't advertise tool-calling support — hermes-agent + # requires it and surfacing them leads to immediate runtime failures + # when the user selects them. Ported from Kilo-Org/kilocode#9068. + if not _openrouter_model_supports_tools(live_item): + continue desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else "" curated.append((preferred_id, desc)) diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py index 5893d79071..61db3f9fbe 100644 --- a/tests/acp/test_server.py +++ b/tests/acp/test_server.py @@ -95,19 +95,37 @@ class TestInitialize: class TestAuthenticate: @pytest.mark.asyncio - async def test_authenticate_with_provider_configured(self, agent, monkeypatch): + async def test_authenticate_with_matching_method_id(self, agent, monkeypatch): monkeypatch.setattr( - "acp_adapter.server.has_provider", - lambda: True, + "acp_adapter.server.detect_provider", + lambda: "openrouter", ) resp = await agent.authenticate(method_id="openrouter") assert isinstance(resp, AuthenticateResponse) + @pytest.mark.asyncio + async def test_authenticate_is_case_insensitive(self, agent, monkeypatch): + monkeypatch.setattr( + "acp_adapter.server.detect_provider", + lambda: "openrouter", + ) + resp = await agent.authenticate(method_id="OpenRouter") + assert isinstance(resp, AuthenticateResponse) + + @pytest.mark.asyncio + async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch): + monkeypatch.setattr( + "acp_adapter.server.detect_provider", + lambda: "openrouter", + ) + resp = await agent.authenticate(method_id="totally-invalid-method") + assert resp is None + @pytest.mark.asyncio async def test_authenticate_without_provider(self, agent, monkeypatch): monkeypatch.setattr( - "acp_adapter.server.has_provider", - lambda: False, + "acp_adapter.server.detect_provider", + lambda: None, ) resp = await agent.authenticate(method_id="openrouter") assert resp is None diff --git a/tests/hermes_cli/test_models.py b/tests/hermes_cli/test_models.py index fc86caeeb5..ea2f3057f4 100644 --- a/tests/hermes_cli/test_models.py +++ b/tests/hermes_cli/test_models.py @@ -88,6 +88,131 @@ class TestFetchOpenRouterModels: assert models == OPENROUTER_MODELS + def test_filters_out_models_without_tool_support(self, monkeypatch): + """Models whose supported_parameters omits 'tools' must not appear in the picker. + + hermes-agent is tool-calling-first — surfacing a non-tool model leads to + immediate runtime failures when the user selects it. Ported from + Kilo-Org/kilocode#9068. + """ + class _Resp: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self): + # opus-4.6 advertises tools → kept + # nano-image has explicit supported_parameters that OMITS tools → dropped + # qwen3.6-plus advertises tools → kept + return ( + b'{"data":[' + b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"},' + b'"supported_parameters":["temperature","tools","tool_choice"]},' + b'{"id":"google/gemini-3-pro-image-preview","pricing":{"prompt":"0.00001","completion":"0.00003"},' + b'"supported_parameters":["temperature","response_format"]},' + b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"},' + b'"supported_parameters":["tools","temperature"]}' + b']}' + ) + + # Include the image-only id in the curated list so it has a chance to be surfaced. + monkeypatch.setattr( + _models_mod, + "OPENROUTER_MODELS", + [ + ("anthropic/claude-opus-4.6", ""), + ("google/gemini-3-pro-image-preview", ""), + ("qwen/qwen3.6-plus", ""), + ], + ) + monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None) + with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()): + models = fetch_openrouter_models(force_refresh=True) + + ids = [mid for mid, _ in models] + assert "anthropic/claude-opus-4.6" in ids + assert "qwen/qwen3.6-plus" in ids + # Image-only model advertised supported_parameters WITHOUT tools → must be dropped. + assert "google/gemini-3-pro-image-preview" not in ids + + def test_permissive_when_supported_parameters_missing(self, monkeypatch): + """Models missing the supported_parameters field keep appearing in the picker. + + Some OpenRouter-compatible gateways (Nous Portal, private mirrors, older + catalog snapshots) don't populate supported_parameters. Treating missing + as 'unknown → allow' prevents the picker from silently emptying on + those gateways. + """ + class _Resp: + def __enter__(self): + return self + + def __exit__(self, exc_type, exc, tb): + return False + + def read(self): + # No supported_parameters field at all on either entry. + return ( + b'{"data":[' + b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"}},' + b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"}}' + b']}' + ) + + monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None) + with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()): + models = fetch_openrouter_models(force_refresh=True) + + ids = [mid for mid, _ in models] + assert "anthropic/claude-opus-4.6" in ids + assert "qwen/qwen3.6-plus" in ids + + +class TestOpenRouterToolSupportHelper: + """Unit tests for _openrouter_model_supports_tools (Kilo port #9068).""" + + def test_tools_in_supported_parameters(self): + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools( + {"id": "x", "supported_parameters": ["temperature", "tools"]} + ) is True + + def test_tools_missing_from_supported_parameters(self): + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools( + {"id": "x", "supported_parameters": ["temperature", "response_format"]} + ) is False + + def test_supported_parameters_absent_is_permissive(self): + """Missing field → allow (so older / non-OR gateways still work).""" + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools({"id": "x"}) is True + + def test_supported_parameters_none_is_permissive(self): + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools({"id": "x", "supported_parameters": None}) is True + + def test_supported_parameters_malformed_is_permissive(self): + """Malformed (non-list) value → allow rather than silently drop.""" + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools( + {"id": "x", "supported_parameters": "tools,temperature"} + ) is True + + def test_non_dict_item_is_permissive(self): + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools(None) is True + assert _openrouter_model_supports_tools("anthropic/claude-opus-4.6") is True + + def test_empty_supported_parameters_list_drops_model(self): + """Explicit empty list → no tools → drop.""" + from hermes_cli.models import _openrouter_model_supports_tools + assert _openrouter_model_supports_tools( + {"id": "x", "supported_parameters": []} + ) is False + class TestFindOpenrouterSlug: def test_exact_match(self): From 724377c42981e0e2a1a27f2f26bdcf7e861bb64a Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 21 Apr 2026 19:17:06 +1000 Subject: [PATCH 41/63] test(mcp): add failing tests for circuit-breaker recovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The MCP circuit breaker in tools/mcp_tool.py has no half-open state and no reset-on-reconnect behavior, so once it trips after 3 consecutive failures it stays tripped for the process lifetime. These tests lock in the intended recovery behavior: 1. test_circuit_breaker_half_opens_after_cooldown — after the cooldown elapses, the next call must actually probe the session; success closes the breaker. 2. test_circuit_breaker_reopens_on_probe_failure — a failed probe re-arms the cooldown instead of letting every subsequent call through. 3. test_circuit_breaker_cleared_on_reconnect — a successful OAuth recovery resets the breaker even if the post-reconnect retry fails (a successful reconnect is sufficient evidence the server is viable again). All three currently fail, as expected. --- tests/tools/test_mcp_circuit_breaker.py | 252 ++++++++++++++++++++++++ 1 file changed, 252 insertions(+) create mode 100644 tests/tools/test_mcp_circuit_breaker.py diff --git a/tests/tools/test_mcp_circuit_breaker.py b/tests/tools/test_mcp_circuit_breaker.py new file mode 100644 index 0000000000..0173fa52af --- /dev/null +++ b/tests/tools/test_mcp_circuit_breaker.py @@ -0,0 +1,252 @@ +"""Tests for MCP tool-handler circuit-breaker recovery. + +The circuit breaker in ``tools/mcp_tool.py`` is intended to short-circuit +calls to an MCP server that has failed ``_CIRCUIT_BREAKER_THRESHOLD`` +consecutive times, then *transition back to a usable state* once the +server has had time to recover (or an explicit reconnect succeeds). + +The original implementation only had two states — closed and open — with +no mechanism to transition back to closed, so a tripped breaker stayed +tripped for the lifetime of the process. These tests lock in the +half-open / cooldown / reconnect-resets-breaker behavior that fixes +that. +""" +import json +from unittest.mock import MagicMock + +import pytest + + +pytest.importorskip("mcp.client.auth.oauth2") + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _install_stub_server(mcp_tool_module, name: str, call_tool_impl): + """Install a fake MCP server in the module's registry. + + ``call_tool_impl`` is an async function stored at ``session.call_tool`` + (it's what the tool handler invokes). + """ + server = MagicMock() + server.name = name + session = MagicMock() + session.call_tool = call_tool_impl + server.session = session + server._reconnect_event = MagicMock() + server._ready = MagicMock() + server._ready.is_set.return_value = True + + mcp_tool_module._servers[name] = server + mcp_tool_module._server_error_counts.pop(name, None) + if hasattr(mcp_tool_module, "_server_breaker_opened_at"): + mcp_tool_module._server_breaker_opened_at.pop(name, None) + return server + + +def _cleanup(mcp_tool_module, name: str) -> None: + mcp_tool_module._servers.pop(name, None) + mcp_tool_module._server_error_counts.pop(name, None) + if hasattr(mcp_tool_module, "_server_breaker_opened_at"): + mcp_tool_module._server_breaker_opened_at.pop(name, None) + + +# --------------------------------------------------------------------------- +# Tests +# --------------------------------------------------------------------------- + + +def test_circuit_breaker_half_opens_after_cooldown(monkeypatch, tmp_path): + """After a tripped breaker's cooldown elapses, the *next* call must + actually execute against the session (half-open probe). When the + probe succeeds, the breaker resets to fully closed. + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools import mcp_tool + from tools.mcp_tool import _make_tool_handler + + call_count = {"n": 0} + + async def _call_tool_success(*a, **kw): + call_count["n"] += 1 + result = MagicMock() + result.isError = False + block = MagicMock() + block.text = "ok" + result.content = [block] + result.structuredContent = None + return result + + _install_stub_server(mcp_tool, "srv", _call_tool_success) + mcp_tool._ensure_mcp_loop() + + try: + # Trip the breaker by setting the count at/above threshold and + # stamping the open-time to "now". + mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + fake_now = [1000.0] + + def _fake_monotonic(): + return fake_now[0] + + monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic) + # The breaker-open timestamp dict is introduced by the fix; on + # a pre-fix build it won't exist, which will cause the test to + # fail at the .get() inside the gate (correct — the fix is + # required for this state to be tracked at all). + if hasattr(mcp_tool, "_server_breaker_opened_at"): + mcp_tool._server_breaker_opened_at["srv"] = fake_now[0] + cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0) + + handler = _make_tool_handler("srv", "tool1", 10.0) + + # Before cooldown: must short-circuit (no session call). + result = handler({}) + parsed = json.loads(result) + assert "error" in parsed, parsed + assert "unreachable" in parsed["error"].lower() + assert call_count["n"] == 0, ( + "breaker should short-circuit before cooldown elapses" + ) + + # Advance past cooldown → next call is a half-open probe that + # actually hits the session. + fake_now[0] += cooldown + 1.0 + + result = handler({}) + parsed = json.loads(result) + assert parsed.get("result") == "ok", parsed + assert call_count["n"] == 1, "half-open probe should invoke session" + + # On probe success the breaker must close (count reset to 0). + assert mcp_tool._server_error_counts.get("srv", 0) == 0 + finally: + _cleanup(mcp_tool, "srv") + + +def test_circuit_breaker_reopens_on_probe_failure(monkeypatch, tmp_path): + """If the half-open probe fails, the breaker must re-arm the + cooldown (not let every subsequent call through). + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools import mcp_tool + from tools.mcp_tool import _make_tool_handler + + call_count = {"n": 0} + + async def _call_tool_fails(*a, **kw): + call_count["n"] += 1 + raise RuntimeError("still broken") + + _install_stub_server(mcp_tool, "srv", _call_tool_fails) + mcp_tool._ensure_mcp_loop() + + try: + mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + fake_now = [1000.0] + + def _fake_monotonic(): + return fake_now[0] + + monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic) + if hasattr(mcp_tool, "_server_breaker_opened_at"): + mcp_tool._server_breaker_opened_at["srv"] = fake_now[0] + cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0) + + handler = _make_tool_handler("srv", "tool1", 10.0) + + # Advance past cooldown, run probe, expect failure. + fake_now[0] += cooldown + 1.0 + result = handler({}) + parsed = json.loads(result) + assert "error" in parsed + assert call_count["n"] == 1, "probe should invoke session once" + + # The probe failure must have re-armed the cooldown — another + # immediate call should short-circuit, not invoke session again. + result = handler({}) + parsed = json.loads(result) + assert "unreachable" in parsed.get("error", "").lower() + assert call_count["n"] == 1, ( + "breaker should re-open and block further calls after probe failure" + ) + finally: + _cleanup(mcp_tool, "srv") + + +def test_circuit_breaker_cleared_on_reconnect(monkeypatch, tmp_path): + """When the auth-recovery path successfully reconnects the server, + the breaker should be cleared so subsequent calls aren't gated on a + stale failure count — even if the post-reconnect retry itself fails. + + This locks in the fix-#2 contract: a successful reconnect is + sufficient evidence that the server is viable again. Under the old + implementation, reset only happened on retry *success*, so a + reconnect+retry-failure left the counter pinned above threshold + forever. + """ + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + + from tools import mcp_tool + from tools.mcp_oauth_manager import get_manager, reset_manager_for_tests + from mcp.client.auth import OAuthFlowError + + reset_manager_for_tests() + + async def _call_tool_unused(*a, **kw): # pragma: no cover + raise AssertionError("session.call_tool should not be reached in this test") + + _install_stub_server(mcp_tool, "srv", _call_tool_unused) + mcp_tool._ensure_mcp_loop() + + # Open the breaker well above threshold, with a recent open-time so + # it would short-circuit everything without a reset. + mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + 2 + if hasattr(mcp_tool, "_server_breaker_opened_at"): + import time as _time + mcp_tool._server_breaker_opened_at["srv"] = _time.monotonic() + + # Force handle_401 to claim recovery succeeded. + mgr = get_manager() + + async def _h401(name, token=None): + return True + + monkeypatch.setattr(mgr, "handle_401", _h401) + + try: + # Retry fails *after* the successful reconnect. Under the old + # implementation this bumps an already-tripped counter even + # higher. Under fix #2 the reset happens on successful + # reconnect, and the post-retry bump only raises the fresh + # count to 1 — still below threshold. + def _retry_call(): + raise OAuthFlowError("still failing post-reconnect") + + result = mcp_tool._handle_auth_error_and_retry( + "srv", + OAuthFlowError("initial"), + _retry_call, + "tools/call test", + ) + # The call as a whole still surfaces needs_reauth because the + # retry itself didn't succeed, but the breaker state must + # reflect the successful reconnect. + assert result is not None + parsed = json.loads(result) + assert parsed.get("needs_reauth") is True, parsed + + # Post-reconnect count was reset to 0, then the failing retry + # bumped it to exactly 1 — well below threshold. + count = mcp_tool._server_error_counts.get("srv", 0) + assert count < mcp_tool._CIRCUIT_BREAKER_THRESHOLD, ( + f"successful reconnect must reset the breaker below threshold; " + f"got count={count}, threshold={mcp_tool._CIRCUIT_BREAKER_THRESHOLD}" + ) + finally: + _cleanup(mcp_tool, "srv") From 8cc3cebca282fb6770482ee9fcb3b1c95cf192cb Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 21 Apr 2026 19:19:13 +1000 Subject: [PATCH 42/63] fix(mcp): add half-open state to circuit breaker The MCP circuit breaker previously had no path back to the closed state: once _server_error_counts[srv] reached _CIRCUIT_BREAKER_THRESHOLD the gate short-circuited every subsequent call, so the only reset path (on successful call) was unreachable. A single transient 3-failure blip (bad network, server restart, expired token) permanently disabled every tool on that MCP server for the rest of the agent session. Introduce a classic closed/open/half-open state machine: - Track a per-server breaker-open timestamp in _server_breaker_opened_at alongside the existing failure count. - Add _CIRCUIT_BREAKER_COOLDOWN_SEC (60s). Once the count reaches threshold, calls short-circuit for the cooldown window. - After the cooldown elapses, the *next* call falls through as a half-open probe that actually hits the session. Success resets the breaker via _reset_server_error; failure re-bumps the count via _bump_server_error, which re-stamps the open timestamp and re-arms the cooldown. The error message now includes the live failure count and an "Auto-retry available in ~Ns" hint so the model knows the breaker will self-heal rather than giving up on the tool for the whole session. Covers tests 1 (half-opens after cooldown) and 2 (reopens on probe failure); test 3 (cleared on reconnect) still fails pending fix #2. --- tools/mcp_tool.py | 85 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 68 insertions(+), 17 deletions(-) diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index a0a22773e5..c393a09f9a 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -1249,9 +1249,47 @@ _servers: Dict[str, MCPServerTask] = {} # _CIRCUIT_BREAKER_THRESHOLD consecutive failures, the handler returns # a "server unreachable" message that tells the model to stop retrying, # preventing the 90-iteration burn loop described in #10447. -# Reset to 0 on any successful call. +# +# State machine: +# closed — error count below threshold; all calls go through. +# open — threshold reached; calls short-circuit until the +# cooldown elapses. +# half-open — cooldown elapsed; the next call is a probe that +# actually hits the session. Probe success → closed. +# Probe failure → reopens (cooldown re-armed). +# +# ``_server_breaker_opened_at`` records the monotonic timestamp when +# the breaker most recently transitioned into the open state. Use the +# ``_bump_server_error`` / ``_reset_server_error`` helpers to mutate +# this state — they keep the count and timestamp in sync. _server_error_counts: Dict[str, int] = {} +_server_breaker_opened_at: Dict[str, float] = {} _CIRCUIT_BREAKER_THRESHOLD = 3 +_CIRCUIT_BREAKER_COOLDOWN_SEC = 60.0 + + +def _bump_server_error(server_name: str) -> None: + """Increment the consecutive-failure count for ``server_name``. + + When the count crosses :data:`_CIRCUIT_BREAKER_THRESHOLD`, stamp the + breaker-open timestamp so the cooldown clock starts (or re-starts, + for probe failures in the half-open state). + """ + n = _server_error_counts.get(server_name, 0) + 1 + _server_error_counts[server_name] = n + if n >= _CIRCUIT_BREAKER_THRESHOLD: + _server_breaker_opened_at[server_name] = time.monotonic() + + +def _reset_server_error(server_name: str) -> None: + """Fully close the breaker for ``server_name``. + + Clears both the failure count and the breaker-open timestamp. Call + this on any unambiguous success signal (successful tool call, + successful reconnect, manual /mcp refresh). + """ + _server_error_counts[server_name] = 0 + _server_breaker_opened_at.pop(server_name, None) # --------------------------------------------------------------------------- # Auth-failure detection helpers (Task 6 of MCP OAuth consolidation) @@ -1396,10 +1434,10 @@ def _handle_auth_error_and_retry( try: parsed = json.loads(result) if "error" not in parsed: - _server_error_counts[server_name] = 0 + _reset_server_error(server_name) return result except (json.JSONDecodeError, TypeError): - _server_error_counts[server_name] = 0 + _reset_server_error(server_name) return result except Exception as retry_exc: logger.warning( @@ -1410,7 +1448,7 @@ def _handle_auth_error_and_retry( # No recovery available, or retry also failed: surface a structured # needs_reauth error. Bumps the circuit breaker so the model stops # retrying the tool. - _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1 + _bump_server_error(server_name) return json.dumps({ "error": ( f"MCP server '{server_name}' requires re-authentication. " @@ -1614,20 +1652,33 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float): # Circuit breaker: if this server has failed too many times # consecutively, short-circuit with a clear message so the model # stops retrying and uses alternative approaches (#10447). + # + # Once the cooldown elapses, the breaker transitions to + # half-open: we let the *next* call through as a probe. On + # success the success-path below resets the breaker; on + # failure the error paths below bump the count again, which + # re-stamps the open-time via _bump_server_error (re-arming + # the cooldown). if _server_error_counts.get(server_name, 0) >= _CIRCUIT_BREAKER_THRESHOLD: - return json.dumps({ - "error": ( - f"MCP server '{server_name}' is unreachable after " - f"{_CIRCUIT_BREAKER_THRESHOLD} consecutive failures. " - f"Do NOT retry this tool — use alternative approaches " - f"or ask the user to check the MCP server." - ) - }, ensure_ascii=False) + opened_at = _server_breaker_opened_at.get(server_name, 0.0) + age = time.monotonic() - opened_at + if age < _CIRCUIT_BREAKER_COOLDOWN_SEC: + remaining = max(1, int(_CIRCUIT_BREAKER_COOLDOWN_SEC - age)) + return json.dumps({ + "error": ( + f"MCP server '{server_name}' is unreachable after " + f"{_server_error_counts[server_name]} consecutive " + f"failures. Auto-retry available in ~{remaining}s. " + f"Do NOT retry this tool yet — use alternative " + f"approaches or ask the user to check the MCP server." + ) + }, ensure_ascii=False) + # Cooldown elapsed → fall through as a half-open probe. with _lock: server = _servers.get(server_name) if not server or not server.session: - _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1 + _bump_server_error(server_name) return json.dumps({ "error": f"MCP server '{server_name}' is not connected" }, ensure_ascii=False) @@ -1676,11 +1727,11 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float): try: parsed = json.loads(result) if "error" in parsed: - _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1 + _bump_server_error(server_name) else: - _server_error_counts[server_name] = 0 # success — reset + _reset_server_error(server_name) # success — reset except (json.JSONDecodeError, TypeError): - _server_error_counts[server_name] = 0 # non-JSON = success + _reset_server_error(server_name) # non-JSON = success return result except InterruptedError: return _interrupted_call_result() @@ -1695,7 +1746,7 @@ def _make_tool_handler(server_name: str, tool_name: str, tool_timeout: float): if recovered is not None: return recovered - _server_error_counts[server_name] = _server_error_counts.get(server_name, 0) + 1 + _bump_server_error(server_name) logger.error( "MCP tool %s/%s call failed: %s", server_name, tool_name, exc, From 484d151e99c1bec71c5ffeb3f08dc7df0c6d9dc2 Mon Sep 17 00:00:00 2001 From: Ben Date: Tue, 21 Apr 2026 19:20:15 +1000 Subject: [PATCH 43/63] fix(mcp): reset circuit breaker on successful OAuth reconnect MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously the breaker was only cleared when the post-reconnect retry call itself succeeded (via _reset_server_error at the end of the try block). If OAuth recovery succeeded but the retry call happened to fail for a different reason, control fell through to the needs_reauth path which called _bump_server_error — adding to an already-tripped count instead of the fresh count the reconnect justified. With fix #1 in place this would still self-heal on the next cooldown, but we should not pay a 60s stall when we already have positive evidence the server is viable. Move _reset_server_error(server_name) up to immediately after the reconnect-and-ready-wait block, before the retry_call. The subsequent retry still goes through _bump_server_error on failure, so a genuinely broken server re-trips the breaker as normal — but the retry starts from a clean count (1 after a failure), not a stale one. --- tools/mcp_tool.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/mcp_tool.py b/tools/mcp_tool.py index c393a09f9a..aecc0cc230 100644 --- a/tools/mcp_tool.py +++ b/tools/mcp_tool.py @@ -1429,6 +1429,16 @@ def _handle_auth_error_and_retry( break time.sleep(0.25) + # A successful OAuth recovery is independent evidence that the + # server is viable again, so close the circuit breaker here — + # not only on retry success. Without this, a reconnect + # followed by a failing retry would leave the breaker pinned + # above threshold forever (the retry-exception branch below + # bumps the count again). The post-reset retry still goes + # through _bump_server_error on failure, so a genuinely broken + # server will re-trip the breaker as normal. + _reset_server_error(server_name) + try: result = retry_call() try: From 3f72b2fe1574fea279198f5e8f234ec386e945f3 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 05:19:22 -0700 Subject: [PATCH 44/63] fix(/model): accept provider switches when /models is unreachable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Gateway /model --provider opencode-go (or any provider whose /models endpoint is down, 404s, or doesn't exist) silently failed. validate_requested_model returned accepted=False whenever fetch_api_models returned None, switch_model returned success=False, and the gateway never wrote _session_model_overrides — so the switch appeared to succeed in the error message flow but the next turn kept calling the old provider. The validator already had static-catalog fallbacks for MiniMax and Codex (providers without a /models endpoint). Extended the same pattern as the terminal fallback: when the live probe fails, consult provider_model_ids() for the curated catalog. Known models → accepted+recognized. Close typos → auto-corrected. Unknown models → soft-accepted with a 'Not in curated catalog' warning. Providers with no catalog at all → soft-accepted with a generic 'Note:' warning, finally honoring the in-code comment ('Accept and persist, but warn') that had been lying since it was written. Tests: 7 new tests in test_opencode_go_validation_fallback.py covering the catalog lookup, case-insensitive match, auto-correct, unknown-with-suggestion, unknown-without-suggestion, and no-catalog paths. TestValidateApiFallback in test_model_validation.py updated — its four 'rejected_when_api_down' tests were encoding exactly the bug being fixed. --- hermes_cli/models.py | 63 ++++++++- tests/hermes_cli/test_model_validation.py | 67 ++++++--- .../test_opencode_go_validation_fallback.py | 133 ++++++++++++++++++ 3 files changed, 243 insertions(+), 20 deletions(-) create mode 100644 tests/hermes_cli/test_opencode_go_validation_fallback.py diff --git a/hermes_cli/models.py b/hermes_cli/models.py index ae54217952..33614d4263 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -2426,13 +2426,70 @@ def validate_requested_model( except Exception: pass # Fall through to generic warning + # Static-catalog fallback: when the /models probe was unreachable, + # validate against the curated list from provider_model_ids() — same + # pattern as the openai-codex and minimax branches above. This fixes + # /model switches in the gateway for providers like opencode-go and + # opencode-zen whose /models endpoint returns 404 against the HTML + # marketing site. Without this block, validate_requested_model would + # reject every model on such providers, switch_model() would return + # success=False, and the gateway would never write to + # _session_model_overrides. provider_label = _PROVIDER_LABELS.get(normalized, normalized) + try: + catalog_models = provider_model_ids(normalized) + except Exception: + catalog_models = [] + + if catalog_models: + catalog_lower = {m.lower(): m for m in catalog_models} + if requested_for_lookup.lower() in catalog_lower: + return { + "accepted": True, + "persist": True, + "recognized": True, + "message": None, + } + catalog_lower_list = list(catalog_lower.keys()) + auto = get_close_matches( + requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9 + ) + if auto: + corrected = catalog_lower[auto[0]] + return { + "accepted": True, + "persist": True, + "recognized": True, + "corrected_model": corrected, + "message": f"Auto-corrected `{requested}` → `{corrected}`", + } + suggestions = get_close_matches( + requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5 + ) + suggestion_text = "" + if suggestions: + suggestion_text = "\n Similar models: " + ", ".join( + f"`{catalog_lower[s]}`" for s in suggestions + ) + return { + "accepted": True, + "persist": True, + "recognized": False, + "message": ( + f"Note: `{requested}` was not found in the {provider_label} curated catalog " + f"and the /models endpoint was unreachable.{suggestion_text}" + f"\n The model may still work if it exists on the provider." + ), + } + + # No catalog available — accept with a warning, matching the comment's + # stated intent ("Accept and persist, but warn"). return { - "accepted": False, - "persist": False, + "accepted": True, + "persist": True, "recognized": False, "message": ( - f"Could not reach the {provider_label} API to validate `{requested}`. " + f"Note: could not reach the {provider_label} API to validate `{requested}`. " f"If the service isn't down, this model may not be valid." ), } diff --git a/tests/hermes_cli/test_model_validation.py b/tests/hermes_cli/test_model_validation.py index 65405d909f..72ffc5216d 100644 --- a/tests/hermes_cli/test_model_validation.py +++ b/tests/hermes_cli/test_model_validation.py @@ -457,29 +457,62 @@ class TestValidateApiNotFound: assert "not found" in result["message"] -# -- validate — API unreachable — reject with guidance ---------------- +# -- validate — API unreachable — soft-accept via catalog or warning -------- class TestValidateApiFallback: - def test_any_model_rejected_when_api_down(self): - result = _validate("anthropic/claude-opus-4.6", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False + """When /models is unreachable, the validator must accept the model (with + a warning) rather than reject it outright — otherwise provider switches + fail in the gateway for any provider whose /models endpoint is down or + doesn't exist (e.g. opencode-go returns 404 HTML). - def test_unknown_model_also_rejected_when_api_down(self): - result = _validate("anthropic/claude-next-gen", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False - assert "could not reach" in result["message"].lower() + Two paths: + 1. Provider has a curated catalog (``_PROVIDER_MODELS`` / live fetch): + validate against it (recognized=True for known models, + recognized=False with 'Note:' for unknown). + 2. Provider has no catalog: accept with a generic 'Note:' warning. - def test_zai_model_rejected_when_api_down(self): + In both cases ``accepted`` and ``persist`` must be True so the gateway can + write the ``_session_model_overrides`` entry. + """ + + def test_known_model_accepted_via_catalog_when_api_down(self): + # Force the openrouter catalog lookup to return a deterministic list. + with patch( + "hermes_cli.models.provider_model_ids", + return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"], + ): + result = _validate("anthropic/claude-opus-4.6", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is True + + def test_unknown_model_accepted_with_note_when_api_down(self): + with patch( + "hermes_cli.models.provider_model_ids", + return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"], + ): + result = _validate("anthropic/claude-next-gen", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + # Message flags it as unverified against the catalog. + assert "not found" in result["message"].lower() or "note" in result["message"].lower() + + def test_zai_known_model_accepted_via_catalog_when_api_down(self): + # glm-5 is in the zai curated catalog (_PROVIDER_MODELS["zai"]). result = _validate("glm-5", provider="zai", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is True - def test_unknown_provider_rejected_when_api_down(self): - result = _validate("some-model", provider="totally-unknown", api_models=None) - assert result["accepted"] is False - assert result["persist"] is False + def test_unknown_provider_soft_accepted_when_api_down(self): + # No catalog for unknown providers — soft-accept with a Note. + with patch("hermes_cli.models.provider_model_ids", return_value=[]): + result = _validate("some-model", provider="totally-unknown", api_models=None) + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "note" in result["message"].lower() def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self): with patch( diff --git a/tests/hermes_cli/test_opencode_go_validation_fallback.py b/tests/hermes_cli/test_opencode_go_validation_fallback.py new file mode 100644 index 0000000000..f0ae76098e --- /dev/null +++ b/tests/hermes_cli/test_opencode_go_validation_fallback.py @@ -0,0 +1,133 @@ +"""Tests for the static-catalog fallback in validate_requested_model. + +OpenCode Go and OpenCode Zen publish an OpenAI-compatible API at paths that do +NOT expose ``/models`` (the path returns the marketing site's HTML 404). This +caused ``validate_requested_model`` to return ``accepted=False`` for every +model on those providers, which in turn made ``switch_model()`` fail and the +gateway's ``/model --provider opencode-go`` command never write to +``_session_model_overrides``. + +These tests cover the catalog-fallback path: when ``fetch_api_models`` returns +``None``, the validator must consult ``provider_model_ids()`` for the provider +(populated from ``_PROVIDER_MODELS``) rather than rejecting outright. +""" + +from unittest.mock import patch + +from hermes_cli.models import validate_requested_model + + +_UNREACHABLE_PROBE = { + "models": None, + "probed_url": "https://opencode.ai/zen/go/v1/models", + "resolved_base_url": "https://opencode.ai/zen/go/v1", + "suggested_base_url": None, + "used_fallback": False, +} + + +def _patched(func): + """Decorator: force fetch_api_models / probe_api_models to simulate an + unreachable /models endpoint, proving the catalog path is used.""" + def wrapper(*args, **kwargs): + with patch("hermes_cli.models.fetch_api_models", return_value=None), \ + patch("hermes_cli.models.probe_api_models", return_value=_UNREACHABLE_PROBE): + return func(*args, **kwargs) + wrapper.__name__ = func.__name__ + return wrapper + + +# --------------------------------------------------------------------------- +# opencode-go: curated catalog in _PROVIDER_MODELS +# --------------------------------------------------------------------------- + + +@_patched +def test_opencode_go_known_model_accepted(): + """A model present in the opencode-go curated catalog must be accepted + even when /models is unreachable.""" + result = validate_requested_model("kimi-k2.6", "opencode-go") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is True + assert result["message"] is None + + +@_patched +def test_opencode_go_known_model_case_insensitive(): + """Catalog lookup is case-insensitive.""" + result = validate_requested_model("KIMI-K2.6", "opencode-go") + assert result["accepted"] is True + assert result["recognized"] is True + + +@_patched +def test_opencode_go_typo_auto_corrected(): + """A close typo (>= 0.9 similarity) is auto-corrected to the catalog + entry.""" + # 'kimi-k2.55' vs 'kimi-k2.5' ratio ≈ 0.95 — within the 0.9 cutoff. + result = validate_requested_model("kimi-k2.55", "opencode-go") + assert result["accepted"] is True + assert result["recognized"] is True + assert result.get("corrected_model") == "kimi-k2.5" + + +@_patched +def test_opencode_go_unknown_model_accepted_with_suggestion(): + """An unknown model that has a medium-similarity match (>= 0.5 but < 0.9) + is accepted with recognized=False and a 'similar models' hint. The key + invariant: the gateway MUST be able to persist this override, so + accepted/persist must both be True.""" + # 'kimi-k3-preview' vs 'kimi-k2.6' — similar enough to suggest, not to auto-correct. + result = validate_requested_model("kimi-k3-preview", "opencode-go") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "kimi-k3-preview" in result["message"] + assert "curated catalog" in result["message"] + + +@_patched +def test_opencode_go_totally_unknown_model_still_accepted(): + """A model with zero similarity to the catalog is still accepted (no + suggestion line) so the user can try a model that hasn't made it into the + curated list yet.""" + result = validate_requested_model("some-brand-new-model", "opencode-go") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + # No suggestion text (no close matches) + assert "Similar models" not in result["message"] + assert "opencode" in result["message"].lower() or "opencode go" in result["message"].lower() + + +# --------------------------------------------------------------------------- +# opencode-zen: same pattern as opencode-go +# --------------------------------------------------------------------------- + + +@_patched +def test_opencode_zen_known_model_accepted(): + """opencode-zen also uses _PROVIDER_MODELS; kimi-k2 is in its catalog.""" + result = validate_requested_model("kimi-k2", "opencode-zen") + assert result["accepted"] is True + assert result["recognized"] is True + + +# --------------------------------------------------------------------------- +# Unknown provider with no catalog: soft-accept (honors the comment's intent) +# --------------------------------------------------------------------------- + + +@_patched +def test_provider_without_catalog_accepts_with_warning(): + """When a provider has no entry in _PROVIDER_MODELS and /models is + unreachable, accept the model with a 'Note:' warning rather than reject. + This matches the in-code comment: 'Accept and persist, but warn so typos + don't silently break things.'""" + # Use a made-up provider name that won't resolve to any catalog. + result = validate_requested_model("some-model", "provider-that-does-not-exist") + assert result["accepted"] is True + assert result["persist"] is True + assert result["recognized"] is False + assert "Note:" in result["message"] From 063bc3c1e2e76c7d46bad6f9ef3f6244bd50efc4 Mon Sep 17 00:00:00 2001 From: Kian Meng Date: Mon, 20 Apr 2026 19:46:24 +0000 Subject: [PATCH 45/63] fix(kimi): send max_tokens, reasoning_effort, and thinking for Kimi/Moonshot MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Kimi/Moonshot endpoints require explicit parameters that Hermes was not sending, causing 'Response truncated due to output length limit' errors and inconsistent reasoning behavior. Root cause analysis against Kimi CLI source (MoonshotAI/kimi-cli, packages/kosong/src/kosong/chat_provider/kimi.py): 1. max_tokens: Kimi's API defaults to a very low value when omitted. Reasoning tokens share the output budget — the model exhausts it on thinking alone. Send 32000, matching Kimi CLI's generate() default. 2. reasoning_effort: Kimi CLI sends this as a top-level parameter (not inside extra_body). Hermes was not sending it at all because _supports_reasoning_extra_body() returns False for non-OpenRouter endpoints. 3. extra_body.thinking: Kimi CLI uses with_thinking() which sets extra_body.thinking={"type":"enabled"} alongside reasoning_effort. This is a separate control from the OpenAI-style reasoning extra_body that Hermes sends for OpenRouter/GitHub. Without it, the Kimi gateway may not activate reasoning mode correctly. Covers api.kimi.com (Kimi Code) and api.moonshot.ai/cn (Moonshot). Tests: 6 new test cases for max_tokens, reasoning_effort, and extra_body.thinking under various configs. --- run_agent.py | 46 ++++++++++++++++++ tests/run_agent/test_run_agent.py | 78 +++++++++++++++++++++++++++++++ 2 files changed, 124 insertions(+) diff --git a/run_agent.py b/run_agent.py index 722f7cea4b..8ead378665 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6909,6 +6909,34 @@ class AIAgent: # (the documented max output for qwen3-coder models) so the # model has adequate output budget for tool calls. api_kwargs.update(self._max_tokens_param(65536)) + elif ( + base_url_host_matches(self.base_url, "api.kimi.com") + or base_url_host_matches(self.base_url, "moonshot.ai") + or base_url_host_matches(self.base_url, "moonshot.cn") + ): + # Kimi/Moonshot defaults to a low max_tokens when omitted. + # Reasoning tokens share the output budget — without an explicit + # value the model can exhaust it on thinking alone, causing + # "Response truncated due to output length limit". 32000 matches + # Kimi CLI's default (see MoonshotAI/kimi-cli kimi.py generate()). + api_kwargs.update(self._max_tokens_param(32000)) + # Kimi requires reasoning_effort as a top-level chat completions + # parameter (not inside extra_body). Mirror Kimi CLI's + # with_generation_kwargs(reasoning_effort=...) / with_thinking(): + # when thinking is disabled, Kimi CLI omits reasoning_effort + # entirely (maps to None). + _kimi_thinking_off = bool( + self.reasoning_config + and isinstance(self.reasoning_config, dict) + and self.reasoning_config.get("enabled") is False + ) + if not _kimi_thinking_off: + _kimi_effort = "medium" + if self.reasoning_config and isinstance(self.reasoning_config, dict): + _e = (self.reasoning_config.get("effort") or "").strip().lower() + if _e in ("low", "medium", "high"): + _kimi_effort = _e + api_kwargs["reasoning_effort"] = _kimi_effort elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower(): # OpenRouter and Nous Portal translate requests to Anthropic's # Messages API, which requires max_tokens as a mandatory field. @@ -6940,6 +6968,24 @@ class AIAgent: extra_body["provider"] = provider_preferences _is_nous = "nousresearch" in self._base_url_lower + # Kimi/Moonshot API uses extra_body.thinking (separate from the + # top-level reasoning_effort) to enable/disable reasoning mode. + # Mirror Kimi CLI's with_thinking() behavior exactly — see + # MoonshotAI/kimi-cli packages/kosong/src/kosong/chat_provider/kimi.py + _is_kimi = ( + base_url_host_matches(self.base_url, "api.kimi.com") + or base_url_host_matches(self.base_url, "moonshot.ai") + or base_url_host_matches(self.base_url, "moonshot.cn") + ) + if _is_kimi: + _kimi_thinking_enabled = True + if self.reasoning_config and isinstance(self.reasoning_config, dict): + if self.reasoning_config.get("enabled") is False: + _kimi_thinking_enabled = False + extra_body["thinking"] = { + "type": "enabled" if _kimi_thinking_enabled else "disabled", + } + if self._supports_reasoning_extra_body(): if _is_github_models: github_reasoning = self._github_models_reasoning_extra_body() diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 9f3341101a..e7a96e5dee 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -952,6 +952,84 @@ class TestBuildApiKwargs: assert "temperature" not in kwargs + def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent): + """Kimi endpoint should send max_tokens=32000 and reasoning_effort as + top-level params, matching Kimi CLI's default behavior.""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["max_tokens"] == 32000 + assert kwargs["reasoning_effort"] == "medium" + + def test_kimi_coding_endpoint_respects_custom_effort(self, agent): + """reasoning_effort should reflect reasoning_config.effort when set.""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + agent.reasoning_config = {"enabled": True, "effort": "high"} + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["reasoning_effort"] == "high" + + def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent): + """Kimi endpoint should send extra_body.thinking={"type":"enabled"} + to activate reasoning mode, mirroring Kimi CLI's with_thinking().""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + + def test_kimi_coding_endpoint_disables_thinking(self, agent): + """When reasoning_config.enabled=False, thinking should be disabled + and reasoning_effort should be omitted entirely — mirroring Kimi + CLI's with_thinking("off") which maps to reasoning_effort=None.""" + agent.base_url = "https://api.kimi.com/coding/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-for-coding" + agent.reasoning_config = {"enabled": False} + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["extra_body"]["thinking"] == {"type": "disabled"} + assert "reasoning_effort" not in kwargs + + def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent): + """api.moonshot.ai should get the same Kimi-compatible params.""" + agent.base_url = "https://api.moonshot.ai/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-k2.5" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["max_tokens"] == 32000 + assert kwargs["reasoning_effort"] == "medium" + assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + + def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent): + """api.moonshot.cn (China endpoint) should get the same params.""" + agent.base_url = "https://api.moonshot.cn/v1" + agent._base_url_lower = agent.base_url.lower() + agent.model = "kimi-k2.5" + messages = [{"role": "user", "content": "hi"}] + + kwargs = agent._build_api_kwargs(messages) + + assert kwargs["max_tokens"] == 32000 + assert kwargs["reasoning_effort"] == "medium" + assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} + def test_provider_preferences_injected(self, agent): agent.base_url = "https://openrouter.ai/api/v1" agent.providers_allowed = ["Anthropic"] From 793199ab0b61de769aa18c0598258975998e4864 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Tue, 21 Apr 2026 05:23:36 -0700 Subject: [PATCH 46/63] chore(release): add mengjian-github to AUTHOR_MAP --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index e36b41d032..780c93c055 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -112,6 +112,7 @@ AUTHOR_MAP = { "xaydinoktay@gmail.com": "aydnOktay", "abdullahfarukozden@gmail.com": "Farukest", "lovre.pesut@gmail.com": "rovle", + "xjtumj@gmail.com": "mengjian-github", "kevinskysunny@gmail.com": "kevinskysunny", "xiewenxuan462@gmail.com": "yule975", "yiweimeng.dlut@hotmail.com": "meng93", From 5c540190552d04ad690b165e75a48a51a8d46880 Mon Sep 17 00:00:00 2001 From: zhangguangtao <50561768+zhanggttry@users.noreply.github.com> Date: Tue, 21 Apr 2026 20:29:59 +0800 Subject: [PATCH 47/63] fix(skills): respect HERMES_SESSION_PLATFORM in _is_skill_disabled Fixes #13027 Previously, `_is_skill_disabled()` only checked the explicit `platform` argument and `os.getenv('HERMES_PLATFORM')`, missing the gateway session context (`HERMES_SESSION_PLATFORM`). This caused `skill_view()` to expose skills that were platform-disabled for the active gateway session. Add `_get_session_platform()` helper that resolves the platform from `gateway.session_context.get_session_env`, mirroring the logic in `agent.skill_utils.get_disabled_skill_names()`. Now the platform resolution follows the same precedence as skill_utils: 1. Explicit `platform` argument 2. `HERMES_PLATFORM` environment variable 3. `HERMES_SESSION_PLATFORM` from gateway session context --- tools/skills_tool.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tools/skills_tool.py b/tools/skills_tool.py index dcd1f8c5d1..6ff54230d5 100644 --- a/tools/skills_tool.py +++ b/tools/skills_tool.py @@ -507,13 +507,33 @@ def _get_disabled_skill_names() -> Set[str]: return get_disabled_skill_names() +def _get_session_platform() -> str: + """Resolve the current platform from gateway session context. + + Mirrors the platform-resolution logic in + ``agent.skill_utils.get_disabled_skill_names`` so that + ``_is_skill_disabled`` respects ``HERMES_SESSION_PLATFORM``. + """ + try: + from gateway.session_context import get_session_env + return get_session_env("HERMES_SESSION_PLATFORM") or "" + except Exception: + return "" + + def _is_skill_disabled(name: str, platform: str = None) -> bool: - """Check if a skill is disabled in config.""" + """Check if a skill is disabled in config. + + Resolves the active platform from (in order of precedence): + 1. Explicit ``platform`` argument + 2. ``HERMES_PLATFORM`` environment variable + 3. ``HERMES_SESSION_PLATFORM`` from gateway session context + """ try: from hermes_cli.config import load_config config = load_config() skills_cfg = config.get("skills", {}) - resolved_platform = platform or os.getenv("HERMES_PLATFORM") + resolved_platform = platform or os.getenv("HERMES_PLATFORM") or _get_session_platform() if resolved_platform: platform_disabled = skills_cfg.get("platform_disabled", {}).get(resolved_platform) if platform_disabled is not None: From 3cc4d7374f2ca92112fabb20a12b4716f729b6cd Mon Sep 17 00:00:00 2001 From: VTRiot <105142614+VTRiot@users.noreply.github.com> Date: Mon, 20 Apr 2026 12:54:48 +0900 Subject: [PATCH 48/63] chore: register VTRiot in AUTHOR_MAP --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index 780c93c055..eb077f1b63 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -329,6 +329,7 @@ AUTHOR_MAP = { "zheng.jerilyn@gmail.com": "jerilynzheng", "asslaenn5@gmail.com": "Aslaaen", "shalompmc0505@naver.com": "pinion05", + "105142614+VTRiot@users.noreply.github.com": "VTRiot", } From 18e7fd83644f90ef144895b01bf8b22f714c448d Mon Sep 17 00:00:00 2001 From: VTRiot <105142614+VTRiot@users.noreply.github.com> Date: Mon, 20 Apr 2026 12:54:55 +0900 Subject: [PATCH 49/63] fix(cron): cancel orphan coroutine on delivery timeout before standalone fallback When the live adapter delivery path (_deliver_result) or media send path (_send_media_via_adapter) times out at future.result(timeout=N), the underlying coroutine scheduled via asyncio.run_coroutine_threadsafe can still complete on the event loop, causing a duplicate send after the standalone fallback runs. Cancel the future on TimeoutError before re-raising, so the standalone fallback is the sole delivery path. Adds TestDeliverResultTimeoutCancelsFuture and TestSendMediaTimeoutCancelsFuture. --- cron/scheduler.py | 12 +++++-- tests/cron/test_scheduler.py | 70 ++++++++++++++++++++++++++++++++++++ 2 files changed, 80 insertions(+), 2 deletions(-) diff --git a/cron/scheduler.py b/cron/scheduler.py index 881132006b..61d5537d90 100644 --- a/cron/scheduler.py +++ b/cron/scheduler.py @@ -252,7 +252,11 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata: coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata) future = asyncio.run_coroutine_threadsafe(coro, loop) - result = future.result(timeout=30) + try: + result = future.result(timeout=30) + except TimeoutError: + future.cancel() + raise if result and not getattr(result, "success", True): logger.warning( "Job '%s': media send failed for %s: %s", @@ -382,7 +386,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata), loop, ) - send_result = future.result(timeout=60) + try: + send_result = future.result(timeout=60) + except TimeoutError: + future.cancel() + raise if send_result and not getattr(send_result, "success", True): err = getattr(send_result, "error", "unknown") logger.warning( diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index e862638eee..c4c722d69f 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -1580,3 +1580,73 @@ class TestParallelTick: end_s1 = [t for action, jid, t in call_times if action == "end" and jid == "s1"][0] start_s2 = [t for action, jid, t in call_times if action == "start" and jid == "s2"][0] assert start_s2 >= end_s1, "Jobs ran concurrently despite max_parallel=1" +async def _noop_coro(): + """Placeholder coroutine used by timeout-cancel tests.""" + return None + + +class TestDeliverResultTimeoutCancelsFuture: + """When future.result(timeout=60) raises TimeoutError in the live + adapter delivery path, the orphan coroutine must be cancelled before + the exception propagates to the standalone fallback. + """ + + def test_timeout_cancels_future_before_fallback(self): + """TimeoutError from future.result must trigger future.cancel().""" + from concurrent.futures import Future + + future = MagicMock(spec=Future) + future.result.side_effect = TimeoutError("timed out") + + def fake_run_coro(coro, loop): + coro.close() + return future + + with patch( + "asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro + ): + with pytest.raises(TimeoutError): + import asyncio + f = asyncio.run_coroutine_threadsafe( + _noop_coro(), MagicMock() + ) + try: + f.result(timeout=60) + except TimeoutError: + f.cancel() + raise + + future.cancel.assert_called_once() + + +class TestSendMediaTimeoutCancelsFuture: + """Same orphan-coroutine guarantee for _send_media_via_adapter's + future.result(timeout=30) call. + """ + + def test_media_timeout_cancels_future(self): + """TimeoutError from the media-send future must call cancel().""" + from concurrent.futures import Future + + future = MagicMock(spec=Future) + future.result.side_effect = TimeoutError("timed out") + + def fake_run_coro(coro, loop): + coro.close() + return future + + with patch( + "asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro + ): + with pytest.raises(TimeoutError): + import asyncio + f = asyncio.run_coroutine_threadsafe( + _noop_coro(), MagicMock() + ) + try: + f.result(timeout=30) + except TimeoutError: + f.cancel() + raise + + future.cancel.assert_called_once() From 267b2faa15bfc6dfd7336ba492a07b8d364c413b Mon Sep 17 00:00:00 2001 From: teknium1 Date: Tue, 21 Apr 2026 05:46:18 -0700 Subject: [PATCH 50/63] test(cron): exercise _deliver_result and _send_media_via_adapter directly for timeout-cancel The original tests replicated the try/except/cancel/raise pattern inline with a mocked future, which tested Python's try/except semantics rather than the scheduler's behavior. Rewrite them to invoke _deliver_result and _send_media_via_adapter end-to-end with a real concurrent.futures.Future whose .result() raises TimeoutError. Mutation-verified: both tests fail when the try/except wrappers are removed from cron/scheduler.py, pass with them in place. --- tests/cron/test_scheduler.py | 147 ++++++++++++++++++++++++----------- 1 file changed, 101 insertions(+), 46 deletions(-) diff --git a/tests/cron/test_scheduler.py b/tests/cron/test_scheduler.py index c4c722d69f..524490eb09 100644 --- a/tests/cron/test_scheduler.py +++ b/tests/cron/test_scheduler.py @@ -1580,73 +1580,128 @@ class TestParallelTick: end_s1 = [t for action, jid, t in call_times if action == "end" and jid == "s1"][0] start_s2 = [t for action, jid, t in call_times if action == "start" and jid == "s2"][0] assert start_s2 >= end_s1, "Jobs ran concurrently despite max_parallel=1" -async def _noop_coro(): - """Placeholder coroutine used by timeout-cancel tests.""" - return None class TestDeliverResultTimeoutCancelsFuture: """When future.result(timeout=60) raises TimeoutError in the live - adapter delivery path, the orphan coroutine must be cancelled before - the exception propagates to the standalone fallback. + adapter delivery path, _deliver_result must cancel the orphan + coroutine so it cannot duplicate-send after the standalone fallback. """ - def test_timeout_cancels_future_before_fallback(self): - """TimeoutError from future.result must trigger future.cancel().""" + def test_live_adapter_timeout_cancels_future_and_falls_back(self): + """End-to-end: live adapter hangs past the 60s budget, _deliver_result + patches the timeout down to a fast value, confirms future.cancel() fires, + and verifies the standalone fallback path still delivers.""" + from gateway.config import Platform from concurrent.futures import Future - future = MagicMock(spec=Future) - future.result.side_effect = TimeoutError("timed out") + # Live adapter whose send() coroutine never resolves within the budget + adapter = AsyncMock() + adapter.send.return_value = MagicMock(success=True) - def fake_run_coro(coro, loop): + pconfig = MagicMock() + pconfig.enabled = True + mock_cfg = MagicMock() + mock_cfg.platforms = {Platform.TELEGRAM: pconfig} + + loop = MagicMock() + loop.is_running.return_value = True + + # A real concurrent.futures.Future so .cancel() has real semantics, + # but we override .result() to raise TimeoutError exactly like the + # 60s wait firing in production. + captured_future = Future() + cancel_calls = [] + original_cancel = captured_future.cancel + + def tracking_cancel(): + cancel_calls.append(True) + return original_cancel() + + captured_future.cancel = tracking_cancel + captured_future.result = MagicMock(side_effect=TimeoutError("timed out")) + + def fake_run_coro(coro, _loop): coro.close() - return future + return captured_future - with patch( - "asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro - ): - with pytest.raises(TimeoutError): - import asyncio - f = asyncio.run_coroutine_threadsafe( - _noop_coro(), MagicMock() - ) - try: - f.result(timeout=60) - except TimeoutError: - f.cancel() - raise + job = { + "id": "timeout-job", + "deliver": "origin", + "origin": {"platform": "telegram", "chat_id": "123"}, + } - future.cancel.assert_called_once() + standalone_send = AsyncMock(return_value={"success": True}) + + with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \ + patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \ + patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro), \ + patch("tools.send_message_tool._send_to_platform", new=standalone_send): + result = _deliver_result( + job, + "Hello world", + adapters={Platform.TELEGRAM: adapter}, + loop=loop, + ) + + # 1. The orphan future was cancelled on timeout (the bug fix) + assert cancel_calls == [True], "future.cancel() must fire on TimeoutError" + # 2. The standalone fallback delivered — no double send, no silent drop + assert result is None, f"expected successful delivery, got error: {result!r}" + standalone_send.assert_awaited_once() class TestSendMediaTimeoutCancelsFuture: """Same orphan-coroutine guarantee for _send_media_via_adapter's - future.result(timeout=30) call. + future.result(timeout=30) call. If this times out mid-batch, the + in-flight coroutine must be cancelled before the next file is tried. """ - def test_media_timeout_cancels_future(self): - """TimeoutError from the media-send future must call cancel().""" + def test_media_send_timeout_cancels_future_and_continues(self): + """End-to-end: _send_media_via_adapter with a future whose .result() + raises TimeoutError. Assert cancel() fires and the loop proceeds + to the next file rather than hanging or crashing.""" from concurrent.futures import Future - future = MagicMock(spec=Future) - future.result.side_effect = TimeoutError("timed out") + adapter = MagicMock() + adapter.send_image_file = AsyncMock() + adapter.send_video = AsyncMock() - def fake_run_coro(coro, loop): + # First file: future that times out. Second file: future that resolves OK. + timeout_future = Future() + timeout_cancel_calls = [] + original_cancel = timeout_future.cancel + + def tracking_cancel(): + timeout_cancel_calls.append(True) + return original_cancel() + + timeout_future.cancel = tracking_cancel + timeout_future.result = MagicMock(side_effect=TimeoutError("timed out")) + + ok_future = Future() + ok_future.set_result(MagicMock(success=True)) + + futures_iter = iter([timeout_future, ok_future]) + + def fake_run_coro(coro, _loop): coro.close() - return future + return next(futures_iter) - with patch( - "asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro - ): - with pytest.raises(TimeoutError): - import asyncio - f = asyncio.run_coroutine_threadsafe( - _noop_coro(), MagicMock() - ) - try: - f.result(timeout=30) - except TimeoutError: - f.cancel() - raise + media_files = [ + ("/tmp/slow.png", False), # times out + ("/tmp/fast.mp4", False), # succeeds + ] - future.cancel.assert_called_once() + loop = MagicMock() + job = {"id": "media-timeout"} + + with patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro): + # Should not raise — the except Exception clause swallows the timeout + _send_media_via_adapter(adapter, "chat-1", media_files, None, loop, job) + + # 1. The timed-out future was cancelled (the bug fix) + assert timeout_cancel_calls == [True], "future.cancel() must fire on TimeoutError" + # 2. Second file still got dispatched — one timeout doesn't abort the batch + adapter.send_video.assert_called_once() + assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4" From bd342f30a234f5f0087923e7176e31d5d1f365c1 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 05:52:22 -0700 Subject: [PATCH 51/63] chore: remove stale requirements.txt in favor of pyproject.toml (#13515) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The root requirements.txt has drifted from pyproject.toml for years (unpinned, missing deps like slack-bolt, slack-sdk, exa-py, anthropic) and no part of the codebase (CI, Dockerfiles, scripts, docs) consumes it. It exists only for drive-by 'pip install -r requirements.txt' users and will drift again within weeks of any sync. Canonical install remains: pip install -e ".[all]" Closes #13488 (thanks @hobostay — your sync was correct, we're just deleting the drift trap instead of patching it). --- requirements.txt | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 requirements.txt diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 96f48e77f5..0000000000 --- a/requirements.txt +++ /dev/null @@ -1,36 +0,0 @@ -# NOTE: This file is maintained for convenience only. -# The canonical dependency list is in pyproject.toml. -# Preferred install: pip install -e ".[all]" - -# Core dependencies -openai -python-dotenv -fire -httpx -rich -tenacity -prompt_toolkit -pyyaml -requests -jinja2 -pydantic>=2.0 -PyJWT[crypto] -debugpy - -# Web tools -firecrawl-py -parallel-web>=0.4.2 - -# Image generation -fal-client - -# Text-to-speech (Edge TTS is free, no API key needed) -edge-tts - -# Optional: For cron expression parsing (cronjob scheduling) -croniter - -# Optional: For messaging platform integrations (gateway) -python-telegram-bot[webhooks]>=22.6 -discord.py>=2.0 -aiohttp>=3.9.0 From 155b6198674e39dfcac4495b559b622bd8d2b6e2 Mon Sep 17 00:00:00 2001 From: unlinearity <134848055+UNLINEARITY@users.noreply.github.com> Date: Tue, 21 Apr 2026 17:55:04 +0800 Subject: [PATCH 52/63] fix(agent): normalize socks:// env proxies for httpx/anthropic WSL2 / Clash-style setups often export ALL_PROXY=socks://127.0.0.1:PORT. httpx and the Anthropic SDK reject that alias and expect socks5://, so agent startup failed early with "Unknown scheme for proxy URL" before any provider request could proceed. Add shared normalize_proxy_url()/normalize_proxy_env_vars() helpers in utils.py and route all proxy entry points through them: - run_agent._get_proxy_from_env - agent.auxiliary_client._validate_proxy_env_urls - agent.anthropic_adapter.build_anthropic_client - gateway.platforms.base.resolve_proxy_url Regression coverage: - run_agent proxy env resolution - auxiliary proxy env normalization - gateway proxy URL resolution Verified with: PYTEST_DISABLE_PLUGIN_AUTOLOAD=1 /home/nonlinear/.hermes/hermes-agent/venv/bin/pytest -o addopts='' -p pytest_asyncio.plugin tests/run_agent/test_create_openai_client_proxy_env.py tests/agent/test_proxy_and_url_validation.py tests/gateway/test_proxy_mode.py 39 passed. --- agent/anthropic_adapter.py | 4 +++ agent/auxiliary_client.py | 4 ++- gateway/platforms/base.py | 8 +++-- run_agent.py | 4 +-- tests/agent/test_proxy_and_url_validation.py | 8 +++++ tests/gateway/test_proxy_mode.py | 10 ++++++ .../test_create_openai_client_proxy_env.py | 8 +++++ utils.py | 34 ++++++++++++++++++- 8 files changed, 73 insertions(+), 7 deletions(-) diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index d8d181cc10..ff1d536b17 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -19,6 +19,7 @@ from pathlib import Path from hermes_constants import get_hermes_home from types import SimpleNamespace from typing import Any, Dict, List, Optional, Tuple +from utils import normalize_proxy_env_vars try: import anthropic as _anthropic_sdk @@ -308,6 +309,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: float = "The 'anthropic' package is required for the Anthropic provider. " "Install it with: pip install 'anthropic>=0.39.0'" ) + + normalize_proxy_env_vars() + from httpx import Timeout normalized_base_url = _normalize_base_url_text(base_url) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 50d4d86afb..4f974a2821 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -48,7 +48,7 @@ from openai import OpenAI from agent.credential_pool import load_pool from hermes_cli.config import get_hermes_home from hermes_constants import OPENROUTER_BASE_URL -from utils import base_url_host_matches, base_url_hostname +from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars logger = logging.getLogger(__name__) @@ -1028,6 +1028,8 @@ def _validate_proxy_env_urls() -> None: """ from urllib.parse import urlparse + normalize_proxy_env_vars() + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"): value = str(os.environ.get(key) or "").strip() diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 86a867c107..afb8767124 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -19,6 +19,8 @@ import uuid from abc import ABC, abstractmethod from urllib.parse import urlsplit +from utils import normalize_proxy_url + logger = logging.getLogger(__name__) @@ -159,13 +161,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None: if platform_env_var: value = (os.environ.get(platform_env_var) or "").strip() if value: - return value + return normalize_proxy_url(value) for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", "https_proxy", "http_proxy", "all_proxy"): value = (os.environ.get(key) or "").strip() if value: - return value - return _detect_macos_system_proxy() + return normalize_proxy_url(value) + return normalize_proxy_url(_detect_macos_system_proxy()) def proxy_kwargs_for_bot(proxy_url: str | None) -> dict: diff --git a/run_agent.py b/run_agent.py index 8ead378665..26b334a5bf 100644 --- a/run_agent.py +++ b/run_agent.py @@ -124,7 +124,7 @@ from agent.trajectory import ( convert_scratchpad_to_think, has_incomplete_scratchpad, save_trajectory as _save_trajectory_to_file, ) -from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled +from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled, normalize_proxy_url @@ -187,7 +187,7 @@ def _get_proxy_from_env() -> Optional[str]: "https_proxy", "http_proxy", "all_proxy"): value = os.environ.get(key, "").strip() if value: - return value + return normalize_proxy_url(value) return None diff --git a/tests/agent/test_proxy_and_url_validation.py b/tests/agent/test_proxy_and_url_validation.py index 4fd6138a4d..7d7268ed1f 100644 --- a/tests/agent/test_proxy_and_url_validation.py +++ b/tests/agent/test_proxy_and_url_validation.py @@ -6,6 +6,8 @@ when proxy env vars or custom endpoint URLs are malformed. """ from __future__ import annotations +import os + import pytest from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls @@ -31,6 +33,12 @@ def test_proxy_env_accepts_empty(monkeypatch): _validate_proxy_env_urls() # should not raise +def test_proxy_env_normalizes_socks_alias(monkeypatch): + monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/") + _validate_proxy_env_urls() + assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/" + + @pytest.mark.parametrize("key", [ "HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY", "http_proxy", "https_proxy", "all_proxy", diff --git a/tests/gateway/test_proxy_mode.py b/tests/gateway/test_proxy_mode.py index 11180639e8..e25f226ee9 100644 --- a/tests/gateway/test_proxy_mode.py +++ b/tests/gateway/test_proxy_mode.py @@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest from gateway.config import Platform, StreamingConfig +from gateway.platforms.base import resolve_proxy_url from gateway.run import GatewayRunner from gateway.session import SessionSource @@ -133,6 +134,15 @@ class TestGetProxyUrl: assert runner._get_proxy_url() is None +class TestResolveProxyUrl: + def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/") + assert resolve_proxy_url() == "socks5://127.0.0.1:1080/" + + class TestRunAgentProxyDispatch: """Test that _run_agent() delegates to proxy when configured.""" diff --git a/tests/run_agent/test_create_openai_client_proxy_env.py b/tests/run_agent/test_create_openai_client_proxy_env.py index 7ac9b7e16e..9ef8e3dcd1 100644 --- a/tests/run_agent/test_create_openai_client_proxy_env.py +++ b/tests/run_agent/test_create_openai_client_proxy_env.py @@ -67,6 +67,14 @@ def test_get_proxy_from_env_ignores_blank_values(monkeypatch): assert _get_proxy_from_env() == "http://real-proxy:8080" +def test_get_proxy_from_env_normalizes_socks_alias(monkeypatch): + for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy"): + monkeypatch.delenv(key, raising=False) + monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/") + assert _get_proxy_from_env() == "socks5://127.0.0.1:1080/" + + @patch("run_agent.OpenAI") def test_create_openai_client_routes_via_proxy_when_env_set(mock_openai, monkeypatch): """With HTTPS_PROXY set, the custom httpx.Client must mount an HTTPProxy pool. diff --git a/utils.py b/utils.py index 6b998e2230..f3d38006d1 100644 --- a/utils.py +++ b/utils.py @@ -197,6 +197,39 @@ def env_bool(key: str, default: bool = False) -> bool: return is_truthy_value(os.getenv(key, ""), default=default) +# ─── Proxy Helpers ──────────────────────────────────────────────────────────── + + +_PROXY_ENV_KEYS = ( + "HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY", + "https_proxy", "http_proxy", "all_proxy", +) + + +def normalize_proxy_url(proxy_url: str | None) -> str | None: + """Normalize proxy URLs for httpx/aiohttp compatibility. + + WSL/Clash-style environments often export SOCKS proxies as + ``socks://127.0.0.1:PORT``. httpx rejects that alias and expects the + explicit ``socks5://`` scheme instead. + """ + candidate = str(proxy_url or "").strip() + if not candidate: + return None + if candidate.lower().startswith("socks://"): + return f"socks5://{candidate[len('socks://'):]}" + return candidate + + +def normalize_proxy_env_vars() -> None: + """Rewrite supported proxy env vars to canonical URL forms in-place.""" + for key in _PROXY_ENV_KEYS: + value = os.getenv(key, "") + normalized = normalize_proxy_url(value) + if normalized and normalized != value: + os.environ[key] = normalized + + # ─── URL Parsing Helpers ────────────────────────────────────────────────────── @@ -236,4 +269,3 @@ def base_url_host_matches(base_url: str, domain: str) -> bool: if not domain: return False return hostname == domain or hostname.endswith("." + domain) - From 027751606ad5aac752e36a50b7f0ec5171bd53a6 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 05:45:50 -0700 Subject: [PATCH 53/63] chore(release): add UNLINEARITY to AUTHOR_MAP --- scripts/release.py | 1 + 1 file changed, 1 insertion(+) diff --git a/scripts/release.py b/scripts/release.py index eb077f1b63..a5c19503b0 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -96,6 +96,7 @@ AUTHOR_MAP = { "i@troy-y.org": "TroyMitchell911", "mygamez@163.com": "zhongyueming1121", "hansnow@users.noreply.github.com": "hansnow", + "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY", # contributors (manual mapping from git names) "ahmedsherif95@gmail.com": "asheriif", "liujinkun@bytedance.com": "liujinkun2025", From ea06104a3c3c33d831ee43665bd0ef7f4cbc4fd6 Mon Sep 17 00:00:00 2001 From: Aniruddha Adak Date: Tue, 21 Apr 2026 15:22:58 +0530 Subject: [PATCH 54/63] fix(permissions): handle None response from ACP request_permission --- acp_adapter/permissions.py | 3 +++ tests/acp/test_permissions.py | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/acp_adapter/permissions.py b/acp_adapter/permissions.py index 68f61e340a..c2e1a59826 100644 --- a/acp_adapter/permissions.py +++ b/acp_adapter/permissions.py @@ -63,6 +63,9 @@ def make_approval_callback( logger.warning("Permission request timed out or failed: %s", exc) return "deny" + if response is None: + return "deny" + outcome = response.outcome if isinstance(outcome, AllowedOutcome): option_id = outcome.option_id diff --git a/tests/acp/test_permissions.py b/tests/acp/test_permissions.py index de83ebeffd..57e2bd4e5b 100644 --- a/tests/acp/test_permissions.py +++ b/tests/acp/test_permissions.py @@ -73,3 +73,17 @@ class TestApprovalMapping: result = cb("rm -rf /", "dangerous") assert result == "deny" + + def test_approval_none_response_returns_deny(self): + """When request_permission resolves to None, the callback should return 'deny'.""" + loop = MagicMock(spec=asyncio.AbstractEventLoop) + mock_rp = MagicMock(name="request_permission") + + future = MagicMock(spec=Future) + future.result.return_value = None + + with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future): + cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0) + result = cb("echo hi", "demo") + + assert result == "deny" From c1fb7b6d27fe9aa9a4e8df8a9698009faba30cc2 Mon Sep 17 00:00:00 2001 From: Aniruddha Adak Date: Tue, 21 Apr 2026 15:28:42 +0530 Subject: [PATCH 55/63] fix: support pagination and cwd filtering in list_sessions --- acp_adapter/server.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/acp_adapter/server.py b/acp_adapter/server.py index aa886cfbdc..a989df5d2f 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -447,6 +447,22 @@ class HermesACPAgent(acp.Agent): **kwargs: Any, ) -> ListSessionsResponse: infos = self.session_manager.list_sessions(cwd=cwd) + + if cursor: + # Find the cursor index + for idx, s in enumerate(infos): + if s["session_id"] == cursor: + infos = infos[idx + 1:] + break + else: + # Cursor not found, return empty + infos = [] + + # Cap limit + limit = kwargs.get("limit", 50) + has_more = len(infos) > limit + infos = infos[:limit] + sessions = [] for s in infos: updated_at = s.get("updated_at") @@ -460,7 +476,9 @@ class HermesACPAgent(acp.Agent): updated_at=updated_at, ) ) - return ListSessionsResponse(sessions=sessions) + + next_cursor = sessions[-1].session_id if has_more and sessions else None + return ListSessionsResponse(sessions=sessions, nextCursor=next_cursor) # ---- Prompt (core) ------------------------------------------------------ From 4cc5065f63f7adf20705396fbd685660d63fd565 Mon Sep 17 00:00:00 2001 From: Teknium Date: Tue, 21 Apr 2026 05:59:19 -0700 Subject: [PATCH 56/63] =?UTF-8?q?fix(acp):=20follow-up=20=E2=80=94=20named?= =?UTF-8?q?-const=20page=20size,=20alias=20kwarg,=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace kwargs.get('limit', 50) with module-level _LIST_SESSIONS_PAGE_SIZE constant. ListSessionsRequest schema has no 'limit' field, so the kwarg path was dead. Constant is the single source of truth for the page cap. - Use next_cursor= (field name) instead of nextCursor= (alias). Both work under the schema's populate_by_name config, but using the declared Python field name is the consistent style in this file. - Add docstring explaining cwd pass-through and cursor semantics. - Add 4 tests: first-page with next_cursor, single-page no next_cursor, cursor resumes after match, unknown cursor returns empty page. --- acp_adapter/server.py | 26 +++++++++++++------- tests/acp/test_server.py | 51 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 8 deletions(-) diff --git a/acp_adapter/server.py b/acp_adapter/server.py index a989df5d2f..1627c22efb 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -71,6 +71,11 @@ except Exception: # Thread pool for running AIAgent (synchronous) in parallel. _executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent") +# Server-side page size for list_sessions. The ACP ListSessionsRequest schema +# does not expose a client-side limit, so this is a fixed cap that clients +# paginate against using `cursor` / `next_cursor`. +_LIST_SESSIONS_PAGE_SIZE = 50 + def _extract_text( prompt: list[ @@ -446,22 +451,27 @@ class HermesACPAgent(acp.Agent): cwd: str | None = None, **kwargs: Any, ) -> ListSessionsResponse: + """List ACP sessions with optional ``cwd`` filtering and cursor pagination. + + ``cwd`` is passed through to ``SessionManager.list_sessions`` which already + normalizes and filters by working directory. ``cursor`` is a ``session_id`` + previously returned as ``next_cursor``; results resume after that entry. + Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more + results remain, ``next_cursor`` is set to the last returned ``session_id``. + """ infos = self.session_manager.list_sessions(cwd=cwd) if cursor: - # Find the cursor index for idx, s in enumerate(infos): if s["session_id"] == cursor: infos = infos[idx + 1:] break else: - # Cursor not found, return empty + # Unknown cursor -> empty page (do not fall back to full list). infos = [] - # Cap limit - limit = kwargs.get("limit", 50) - has_more = len(infos) > limit - infos = infos[:limit] + has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE + infos = infos[:_LIST_SESSIONS_PAGE_SIZE] sessions = [] for s in infos: @@ -476,9 +486,9 @@ class HermesACPAgent(acp.Agent): updated_at=updated_at, ) ) - + next_cursor = sessions[-1].session_id if has_more and sessions else None - return ListSessionsResponse(sessions=sessions, nextCursor=next_cursor) + return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor) # ---- Prompt (core) ------------------------------------------------------ diff --git a/tests/acp/test_server.py b/tests/acp/test_server.py index 61db3f9fbe..faa4c18a70 100644 --- a/tests/acp/test_server.py +++ b/tests/acp/test_server.py @@ -270,6 +270,57 @@ class TestListAndFork: mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3") + @pytest.mark.asyncio + async def test_list_sessions_pagination_first_page(self, agent): + from acp_adapter import server as acp_server + + infos = [ + {"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0} + for i in range(acp_server._LIST_SESSIONS_PAGE_SIZE + 5) + ] + with patch.object(agent.session_manager, "list_sessions", return_value=infos): + resp = await agent.list_sessions() + + assert len(resp.sessions) == acp_server._LIST_SESSIONS_PAGE_SIZE + assert resp.next_cursor == resp.sessions[-1].session_id + + @pytest.mark.asyncio + async def test_list_sessions_pagination_no_more(self, agent): + infos = [ + {"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0} + for i in range(3) + ] + with patch.object(agent.session_manager, "list_sessions", return_value=infos): + resp = await agent.list_sessions() + + assert len(resp.sessions) == 3 + assert resp.next_cursor is None + + @pytest.mark.asyncio + async def test_list_sessions_cursor_resumes_after_match(self, agent): + infos = [ + {"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0}, + {"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0}, + {"session_id": "s3", "cwd": "/tmp", "title": None, "updated_at": 0.0}, + ] + with patch.object(agent.session_manager, "list_sessions", return_value=infos): + resp = await agent.list_sessions(cursor="s1") + + assert [s.session_id for s in resp.sessions] == ["s2", "s3"] + assert resp.next_cursor is None + + @pytest.mark.asyncio + async def test_list_sessions_unknown_cursor_returns_empty(self, agent): + infos = [ + {"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0}, + {"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0}, + ] + with patch.object(agent.session_manager, "list_sessions", return_value=infos): + resp = await agent.list_sessions(cursor="does-not-exist") + + assert resp.sessions == [] + assert resp.next_cursor is None + # --------------------------------------------------------------------------- # session configuration / model routing # --------------------------------------------------------------------------- From 7fc1e91811b7f5ceab0bdc85e01ca4f77a8555a5 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 06:06:16 -0700 Subject: [PATCH 57/63] security(runtime_provider): close OLLAMA_API_KEY substring-leak sweep miss (#13522) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two call sites still used a raw substring check to identify ollama.com: hermes_cli/runtime_provider.py:496: _is_ollama_url = "ollama.com" in base_url.lower() run_agent.py:6127: if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() ... Same bug class as GHSA-xf8p-v2cg-h7h5 (OpenRouter substring leak), which was fixed in commit dbb7e00e via base_url_host_matches() across the codebase. The earlier sweep missed these two Ollama sites. Self-discovered during April 2026 security-advisory triage; filed as GHSA-76xc-57q6-vm5m. Impact is narrow — requires a user with OLLAMA_API_KEY configured AND a custom base_url whose path or look-alike host contains 'ollama.com'. Users on default provider flows are unaffected. Filed as a draft advisory to use the private-fork flow; not CVE-worthy on its own. Fix is mechanical: replace substring check with base_url_host_matches at both sites. Same helper the rest of the codebase uses. Tests: 67 -> 71 passing. 7 new host-matcher cases in tests/test_base_url_hostname.py (path injection, lookalike host, localtest.me subdomain, ollama.ai TLD confusion, localhost, genuine ollama.com, api.ollama.com subdomain) + 4 call-site tests in tests/hermes_cli/test_runtime_provider_resolution.py verifying OLLAMA_API_KEY is selected only when base_url actually targets ollama.com. Fixes GHSA-76xc-57q6-vm5m --- hermes_cli/runtime_provider.py | 8 +- run_agent.py | 5 +- .../test_runtime_provider_resolution.py | 87 +++++++++++++++++++ tests/test_base_url_hostname.py | 52 +++++++++++ 4 files changed, 148 insertions(+), 4 deletions(-) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index fd28f51368..62f1407cc7 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -492,8 +492,12 @@ def _resolve_openrouter_runtime( else: # Custom endpoint: use api_key from config when using config base_url (#1760). # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's - # the canonical env var for ollama.com authentication. - _is_ollama_url = "ollama.com" in base_url.lower() + # the canonical env var for ollama.com authentication. Match on + # HOST, not substring — a custom base_url whose path contains + # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose + # hostname is a look-alike (ollama.com.attacker.test) must not + # receive the Ollama credential. See GHSA-76xc-57q6-vm5m. + _is_ollama_url = base_url_host_matches(base_url, "ollama.com") api_key_candidates = [ explicit_api_key, (cfg_api_key if use_config_base_url else ""), diff --git a/run_agent.py b/run_agent.py index 26b334a5bf..ba8a2bf4ea 100644 --- a/run_agent.py +++ b/run_agent.py @@ -6123,8 +6123,9 @@ class AIAgent: fb_base_url_hint = (fb.get("base_url") or "").strip() or None fb_api_key_hint = (fb.get("api_key") or "").strip() or None # For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env - # when no explicit key is in the fallback config. - if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint: + # when no explicit key is in the fallback config. Host match + # (not substring) — see GHSA-76xc-57q6-vm5m. + if fb_base_url_hint and base_url_host_matches(fb_base_url_hint, "ollama.com") and not fb_api_key_hint: fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None fb_client, _resolved_fb_model = resolve_provider_client( fb_provider, model=fb_model, raw_codex=True, diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py index c7510a55b8..9d2232f39c 100644 --- a/tests/hermes_cli/test_runtime_provider_resolution.py +++ b/tests/hermes_cli/test_runtime_provider_resolution.py @@ -1412,3 +1412,90 @@ def test_named_custom_runtime_no_model_when_absent(monkeypatch): resolved = rp.resolve_runtime_provider(requested="my-server") assert "model" not in resolved + + +# --------------------------------------------------------------------------- +# GHSA-76xc-57q6-vm5m — Ollama URL substring leak +# +# Same bug class as the previously-fixed GHSA-xf8p-v2cg-h7h5 (OpenRouter). +# _resolve_openrouter_runtime's custom-endpoint branch selects OLLAMA_API_KEY +# when the base_url "looks like" ollama.com. Previous implementation used +# raw substring match; a custom base_url whose PATH or look-alike host +# merely contained "ollama.com" leaked OLLAMA_API_KEY to that endpoint. +# Fix: use base_url_host_matches (same helper as the OpenRouter sweep). +# --------------------------------------------------------------------------- + +class TestOllamaUrlSubstringLeak: + """Call-site regression tests for the fix in _resolve_openrouter_runtime.""" + + def _make_cfg(self, base_url): + return {"base_url": base_url, "api_key": "", "provider": "custom"} + + def test_ollama_key_not_leaked_to_path_injection(self, monkeypatch): + """http://127.0.0.1:9000/ollama.com/v1 — attacker endpoint with + ollama.com in PATH. Must resolve to OPENAI_API_KEY, not OLLAMA_API_KEY.""" + monkeypatch.setenv("OPENAI_API_KEY", "oa-secret") + monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret") + monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "http://127.0.0.1:9000/ollama.com/v1" + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert "ol-SECRET" not in resolved["api_key"], ( + "OLLAMA_API_KEY must not be sent to an endpoint whose " + "hostname is not ollama.com (GHSA-76xc-57q6-vm5m)" + ) + assert resolved["api_key"] == "oa-secret" + + def test_ollama_key_not_leaked_to_lookalike_host(self, monkeypatch): + """ollama.com.attacker.test — look-alike host. OLLAMA_API_KEY + must not be sent.""" + monkeypatch.setenv("OPENAI_API_KEY", "oa-secret") + monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "http://ollama.com.attacker.test:9000/v1" + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert "ol-SECRET" not in resolved["api_key"] + assert resolved["api_key"] == "oa-secret" + + def test_ollama_key_sent_to_genuine_ollama_com(self, monkeypatch): + """https://ollama.com/v1 — legit Ollama Cloud. OLLAMA_API_KEY + should be used.""" + monkeypatch.setenv("OPENAI_API_KEY", "oa-secret") + monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "https://ollama.com/v1" + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["api_key"] == "ol-legit-key" + + def test_ollama_key_sent_to_ollama_subdomain(self, monkeypatch): + """https://api.ollama.com/v1 — legit subdomain.""" + monkeypatch.setenv("OPENAI_API_KEY", "oa-secret") + monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key") + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom") + monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg( + "https://api.ollama.com/v1" + )) + monkeypatch.setattr(rp, "load_pool", lambda provider: None) + monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None) + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["api_key"] == "ol-legit-key" diff --git a/tests/test_base_url_hostname.py b/tests/test_base_url_hostname.py index 54aca08c02..cdf8450a25 100644 --- a/tests/test_base_url_hostname.py +++ b/tests/test_base_url_hostname.py @@ -106,3 +106,55 @@ class TestBaseUrlHostMatchesEdgeCases: def test_trailing_dot_on_domain_stripped(self): assert base_url_host_matches("https://openrouter.ai/v1", "openrouter.ai.") is True + + +class TestOllamaUrlHostCheck: + """GHSA-76xc-57q6-vm5m — ollama.com was using a raw substring match for + credential selection (same bug class as GHSA-xf8p-v2cg-h7h5 for OpenRouter). + These tests lock in that the base_url_host_matches fix correctly rejects + the same attack vectors for Ollama. + """ + + def test_ollama_com_path_injection_rejected(self): + """http://evil.test/ollama.com/v1 — ollama.com appears in the path, + not the host. Must not be treated as Ollama Cloud.""" + assert base_url_host_matches( + "http://127.0.0.1:9000/ollama.com/v1", "ollama.com" + ) is False + + def test_ollama_com_subdomain_lookalike_rejected(self): + """ollama.com.attacker.test is a separate host, not ollama.com.""" + assert base_url_host_matches( + "http://ollama.com.attacker.test:9000/v1", "ollama.com" + ) is False + + def test_ollama_com_localtest_me_rejected(self): + """ollama.com.localtest.me resolves to 127.0.0.1 via localtest.me + but its true hostname is localtest.me, not ollama.com.""" + assert base_url_host_matches( + "http://ollama.com.localtest.me:9000/v1", "ollama.com" + ) is False + + def test_ollama_ai_is_not_ollama_com(self): + """Different TLD. ollama.ai is not ollama.com.""" + assert base_url_host_matches( + "https://ollama.ai/v1", "ollama.com" + ) is False + + def test_localhost_ollama_port_is_not_ollama_com(self): + """http://localhost:11434/v1 is a local Ollama install, but its + hostname is localhost, so OLLAMA_API_KEY (an ollama.com-only secret) + must not be sent.""" + assert base_url_host_matches( + "http://localhost:11434/v1", "ollama.com" + ) is False + + def test_genuine_ollama_com_matches(self): + assert base_url_host_matches( + "https://ollama.com/api/generate", "ollama.com" + ) is True + + def test_ollama_com_subdomain_matches(self): + assert base_url_host_matches( + "https://api.ollama.com/v1", "ollama.com" + ) is True From ba4357d13b1f1ae29ebc202ffc557d32e99a04ce Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 06:14:25 -0700 Subject: [PATCH 58/63] fix(env_passthrough): reject Hermes provider credentials from skill passthrough (#13523) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A skill declaring `required_environment_variables: [ANTHROPIC_TOKEN]` in its SKILL.md frontmatter silently bypassed the `execute_code` sandbox's credential-scrubbing guarantee. `register_env_passthrough` had no blocklist, so any name a skill chose flipped `is_env_passthrough(name) => True`, which shortcircuits the sandbox's secret filter. Fix: reject registration when the name appears in `_HERMES_PROVIDER_ENV_BLOCKLIST` (the canonical list of Hermes-managed credentials — provider keys, gateway tokens, etc.). Log a warning naming GHSA-rhgp-j443-p4rf so operators see the rejection in logs. Non-Hermes third-party API keys (TENOR_API_KEY for gif-search, NOTION_TOKEN for notion skills, etc.) remain legitimately registerable — they were never in the sandbox scrub list in the first place. Tests: 16 -> 17 passing. Two old tests that documented the bypass (`test_passthrough_allows_blocklisted_var`, `test_make_run_env_passthrough`) are rewritten to assert the new fail-closed behavior. New `test_non_hermes_api_key_still_registerable` locks in that legitimate third-party keys are unaffected. Reported in GHSA-rhgp-j443-p4rf by @q1uf3ng. Hardening; not CVE-worthy on its own per the decision matrix (attacker must already have operator consent to install a malicious skill). --- tests/tools/test_env_passthrough.py | 60 ++++++++++++++++++++++------- tools/env_passthrough.py | 49 +++++++++++++++++++++-- 2 files changed, 92 insertions(+), 17 deletions(-) diff --git a/tests/tools/test_env_passthrough.py b/tests/tools/test_env_passthrough.py index 6e48ee5c30..eba84bdb2c 100644 --- a/tests/tools/test_env_passthrough.py +++ b/tests/tools/test_env_passthrough.py @@ -172,28 +172,60 @@ class TestTerminalIntegration: assert blocked_var not in result assert "PATH" in result - def test_passthrough_allows_blocklisted_var(self): - from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST + def test_passthrough_cannot_override_provider_blocklist(self): + """GHSA-rhgp-j443-p4rf: register_env_passthrough must NOT accept + Hermes provider credentials — that was the bypass where a skill + could declare ANTHROPIC_TOKEN / OPENAI_API_KEY as passthrough and + defeat the execute_code sandbox scrubbing.""" + from tools.environments.local import ( + _sanitize_subprocess_env, + _HERMES_PROVIDER_ENV_BLOCKLIST, + ) blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST)) + # Attempt to register — must be silently refused (logged warning). register_env_passthrough([blocked_var]) + # is_env_passthrough must NOT report it as allowed + assert not is_env_passthrough(blocked_var) + + # Sanitizer still strips the var from subprocess env env = {blocked_var: "secret_value", "PATH": "/usr/bin"} result = _sanitize_subprocess_env(env) - assert blocked_var in result - assert result[blocked_var] == "secret_value" + assert blocked_var not in result + assert "PATH" in result - def test_make_run_env_passthrough(self, monkeypatch): - from tools.environments.local import _make_run_env, _HERMES_PROVIDER_ENV_BLOCKLIST + def test_make_run_env_blocklist_override_rejected(self): + """_make_run_env must NOT expose a blocklisted var to subprocess env + even after a skill attempts to register it via passthrough.""" + import os + from tools.environments.local import ( + _make_run_env, + _HERMES_PROVIDER_ENV_BLOCKLIST, + ) blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST)) - monkeypatch.setenv(blocked_var, "secret_value") + os.environ[blocked_var] = "secret_value" + try: + # Without passthrough — blocked + result_before = _make_run_env({}) + assert blocked_var not in result_before - # Without passthrough — blocked - result_before = _make_run_env({}) - assert blocked_var not in result_before + # Skill tries to register it — must be refused, so still blocked + register_env_passthrough([blocked_var]) + result_after = _make_run_env({}) + assert blocked_var not in result_after + finally: + os.environ.pop(blocked_var, None) - # With passthrough — allowed - register_env_passthrough([blocked_var]) - result_after = _make_run_env({}) - assert blocked_var in result_after + def test_non_hermes_api_key_still_registerable(self): + """Third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT + Hermes provider credentials and must still pass through — skills + that legitimately wrap third-party APIs must keep working.""" + # TENOR_API_KEY is a real example — used by the gif-search skill + register_env_passthrough(["TENOR_API_KEY"]) + assert is_env_passthrough("TENOR_API_KEY") + + # Arbitrary skill-specific var + register_env_passthrough(["MY_SKILL_CUSTOM_CONFIG"]) + assert is_env_passthrough("MY_SKILL_CUSTOM_CONFIG") diff --git a/tools/env_passthrough.py b/tools/env_passthrough.py index b4686cb13f..07bf333a60 100644 --- a/tools/env_passthrough.py +++ b/tools/env_passthrough.py @@ -44,16 +44,59 @@ def _get_allowed() -> set[str]: _config_passthrough: frozenset[str] | None = None +def _is_hermes_provider_credential(name: str) -> bool: + """True if ``name`` is a Hermes-managed provider credential (API key, + token, or similar) per ``_HERMES_PROVIDER_ENV_BLOCKLIST``. + + Skill-declared ``required_environment_variables`` frontmatter must + not be able to override this list — that was the bypass in + GHSA-rhgp-j443-p4rf where a malicious skill registered + ``ANTHROPIC_TOKEN`` / ``OPENAI_API_KEY`` as passthrough and received + the credential in the ``execute_code`` child process, defeating the + sandbox's scrubbing guarantee. + + Non-Hermes API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT + in the blocklist and remain legitimately registerable — skills that + wrap third-party APIs still work. + """ + try: + from tools.environments.local import _HERMES_PROVIDER_ENV_BLOCKLIST + except Exception: + return False + return name in _HERMES_PROVIDER_ENV_BLOCKLIST + + def register_env_passthrough(var_names: Iterable[str]) -> None: """Register environment variable names as allowed in sandboxed environments. Typically called when a skill declares ``required_environment_variables``. + + Variables that are Hermes-managed provider credentials (from + ``_HERMES_PROVIDER_ENV_BLOCKLIST``) are rejected here to preserve + the ``execute_code`` sandbox's credential-scrubbing guarantee per + GHSA-rhgp-j443-p4rf. A skill that needs to talk to a Hermes-managed + provider should do so via the agent's main-process tools (web_search, + web_extract, etc.) where the credential remains safely in the main + process. + + Non-Hermes third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) + pass through normally — they were never in the sandbox scrub list. """ for name in var_names: name = name.strip() - if name: - _get_allowed().add(name) - logger.debug("env passthrough: registered %s", name) + if not name: + continue + if _is_hermes_provider_credential(name): + logger.warning( + "env passthrough: refusing to register Hermes provider " + "credential %r (blocked by _HERMES_PROVIDER_ENV_BLOCKLIST). " + "Skills must not override the execute_code sandbox's " + "credential scrubbing; see GHSA-rhgp-j443-p4rf.", + name, + ) + continue + _get_allowed().add(name) + logger.debug("env passthrough: registered %s", name) def _load_config_passthrough() -> frozenset[str]: From 62348cffbed633e21dc4c75bc9de0d5536161697 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 06:20:40 -0700 Subject: [PATCH 59/63] fix(acp): wire approval callback + make it thread-local (#13525) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two related ACP approval issues: GHSA-96vc-wcxf-jjff — ACP's _run_agent never set HERMES_INTERACTIVE (or any other flag recognized by tools.approval), so check_all_command_guards took the non-interactive auto-approve path and never consulted the ACP-supplied approval callback (conn.request_permission). Dangerous commands executed in ACP sessions without operator approval despite the callback being installed. Fix: set HERMES_INTERACTIVE=1 around the agent run so check_all_command_guards routes through prompt_dangerous_approval(approval_callback=...) — the correct shape for ACP's per-session request_permission call. HERMES_EXEC_ASK would have routed through the gateway-queue path instead, which requires a notify_cb registered in _gateway_notify_cbs (not applicable to ACP). GHSA-qg5c-hvr5-hjgr — _approval_callback and _sudo_password_callback were module-level globals in terminal_tool. Concurrent ACP sessions running in ThreadPoolExecutor threads each installed their own callback into the same slot, racing. Fix: store both callbacks in threading.local() so each thread has its own slot. CLI mode (single thread) is unaffected; gateway mode uses a separate queue-based approval path and was never touched. set_approval_callback is now called INSIDE _run_agent (the executor thread) rather than before dispatching — so the TLS write lands on the correct thread. Tests: 5 new in tests/acp/test_approval_isolation.py covering thread-local isolation of both callbacks and the HERMES_INTERACTIVE callback routing. Existing tests/acp/ (159 tests) and tests/tools/ approval-related tests continue to pass. Fixes GHSA-96vc-wcxf-jjff Fixes GHSA-qg5c-hvr5-hjgr --- acp_adapter/server.py | 37 ++++-- tests/acp/test_approval_isolation.py | 170 +++++++++++++++++++++++++++ tools/terminal_tool.py | 49 ++++++-- 3 files changed, 236 insertions(+), 20 deletions(-) create mode 100644 tests/acp/test_approval_isolation.py diff --git a/acp_adapter/server.py b/acp_adapter/server.py index 1627c22efb..d73c71157a 100644 --- a/acp_adapter/server.py +++ b/acp_adapter/server.py @@ -4,6 +4,7 @@ from __future__ import annotations import asyncio import logging +import os from collections import defaultdict, deque from concurrent.futures import ThreadPoolExecutor from typing import Any, Deque, Optional @@ -554,15 +555,32 @@ class HermesACPAgent(acp.Agent): agent.step_callback = step_cb agent.message_callback = message_cb - if approval_cb: - try: - from tools import terminal_tool as _terminal_tool - previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None) - _terminal_tool.set_approval_callback(approval_cb) - except Exception: - logger.debug("Could not set ACP approval callback", exc_info=True) + # Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr). + # Set it INSIDE _run_agent so the TLS write happens in the executor + # thread — setting it here would write to the event-loop thread's TLS, + # not the executor's. Also set HERMES_INTERACTIVE so approval.py + # takes the CLI-interactive path (which calls the registered + # callback via prompt_dangerous_approval) instead of the + # non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff). + # ACP's conn.request_permission maps cleanly to the interactive + # callback shape — not the gateway-queue HERMES_EXEC_ASK path, + # which requires a notify_cb registered in _gateway_notify_cbs. + previous_approval_cb = None + previous_interactive = None def _run_agent() -> dict: + nonlocal previous_approval_cb, previous_interactive + if approval_cb: + try: + from tools import terminal_tool as _terminal_tool + previous_approval_cb = _terminal_tool._get_approval_callback() + _terminal_tool.set_approval_callback(approval_cb) + except Exception: + logger.debug("Could not set ACP approval callback", exc_info=True) + # Signal to tools.approval that we have an interactive callback + # and the non-interactive auto-approve path must not fire. + previous_interactive = os.environ.get("HERMES_INTERACTIVE") + os.environ["HERMES_INTERACTIVE"] = "1" try: result = agent.run_conversation( user_message=user_text, @@ -574,6 +592,11 @@ class HermesACPAgent(acp.Agent): logger.exception("Agent error in session %s", session_id) return {"final_response": f"Error: {e}", "messages": state.history} finally: + # Restore HERMES_INTERACTIVE. + if previous_interactive is None: + os.environ.pop("HERMES_INTERACTIVE", None) + else: + os.environ["HERMES_INTERACTIVE"] = previous_interactive if approval_cb: try: from tools import terminal_tool as _terminal_tool diff --git a/tests/acp/test_approval_isolation.py b/tests/acp/test_approval_isolation.py new file mode 100644 index 0000000000..90ea4e063e --- /dev/null +++ b/tests/acp/test_approval_isolation.py @@ -0,0 +1,170 @@ +"""Tests for GHSA-96vc-wcxf-jjff and GHSA-qg5c-hvr5-hjgr. + +Two related ACP approval-flow issues: +- 96vc: ACP didn't set HERMES_EXEC_ASK, so `check_all_command_guards` + took the non-interactive auto-approve path and never consulted the + ACP-supplied callback. +- qg5c: `_approval_callback` was a module-global in terminal_tool; + overlapping ACP sessions overwrote each other's callback slot. + +Both fixed together by: +1. Setting HERMES_EXEC_ASK inside _run_agent (wraps the agent call). +2. Storing the callback in thread-local state so concurrent executor + threads don't collide. +""" + +import os +import threading +from unittest.mock import MagicMock + +import pytest + + +class TestThreadLocalApprovalCallback: + """GHSA-qg5c-hvr5-hjgr: set_approval_callback must be per-thread so + concurrent ACP sessions don't stomp on each other's handlers.""" + + def test_set_and_get_in_same_thread(self): + from tools.terminal_tool import ( + set_approval_callback, + _get_approval_callback, + ) + + cb1 = lambda cmd, desc: "once" # noqa: E731 + set_approval_callback(cb1) + assert _get_approval_callback() is cb1 + + def test_callback_not_visible_in_different_thread(self): + """Thread A's callback is NOT visible to Thread B.""" + from tools.terminal_tool import ( + set_approval_callback, + _get_approval_callback, + ) + + cb_a = lambda cmd, desc: "thread_a" # noqa: E731 + cb_b = lambda cmd, desc: "thread_b" # noqa: E731 + + seen_in_a = [] + seen_in_b = [] + + def thread_a(): + set_approval_callback(cb_a) + # Pause so thread B has time to set its own callback + import time + time.sleep(0.05) + seen_in_a.append(_get_approval_callback()) + + def thread_b(): + set_approval_callback(cb_b) + import time + time.sleep(0.05) + seen_in_b.append(_get_approval_callback()) + + ta = threading.Thread(target=thread_a) + tb = threading.Thread(target=thread_b) + ta.start() + tb.start() + ta.join() + tb.join() + + # Each thread must see ONLY its own callback — not the other's + assert seen_in_a == [cb_a] + assert seen_in_b == [cb_b] + + def test_main_thread_callback_not_leaked_to_worker(self): + """A callback set in the main thread does NOT leak into a + freshly-spawned worker thread.""" + from tools.terminal_tool import ( + set_approval_callback, + _get_approval_callback, + ) + + cb_main = lambda cmd, desc: "main" # noqa: E731 + set_approval_callback(cb_main) + + worker_saw = [] + + def worker(): + worker_saw.append(_get_approval_callback()) + + t = threading.Thread(target=worker) + t.start() + t.join() + + # Worker thread has no callback set — TLS is empty for it + assert worker_saw == [None] + # Main thread still has its callback + assert _get_approval_callback() is cb_main + + def test_sudo_password_callback_also_thread_local(self): + """Same protection applies to the sudo password callback.""" + from tools.terminal_tool import ( + set_sudo_password_callback, + _get_sudo_password_callback, + ) + + cb_main = lambda: "main-password" # noqa: E731 + set_sudo_password_callback(cb_main) + + worker_saw = [] + + def worker(): + worker_saw.append(_get_sudo_password_callback()) + + t = threading.Thread(target=worker) + t.start() + t.join() + + assert worker_saw == [None] + assert _get_sudo_password_callback() is cb_main + + +class TestAcpExecAskGate: + """GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so + that tools.approval.check_all_command_guards takes the CLI-interactive + path (consults the registered callback via prompt_dangerous_approval) + instead of the non-interactive auto-approve shortcut. + + (HERMES_EXEC_ASK takes the gateway-queue path which requires a + notify_cb registered in _gateway_notify_cbs — not applicable to ACP, + which uses a direct callback shape.)""" + + def test_interactive_env_var_routes_to_callback(self, monkeypatch): + """When HERMES_INTERACTIVE is set and an approval callback is + registered, a dangerous command must route through the callback.""" + # Clean env + monkeypatch.delenv("HERMES_INTERACTIVE", raising=False) + monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False) + monkeypatch.delenv("HERMES_EXEC_ASK", raising=False) + monkeypatch.delenv("HERMES_YOLO_MODE", raising=False) + + from tools.approval import check_all_command_guards + + called_with = [] + + def fake_cb(command, description, *, allow_permanent=True): + called_with.append((command, description)) + return "once" + + # Without HERMES_INTERACTIVE: takes auto-approve path, callback NOT called + result = check_all_command_guards( + "rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb, + ) + assert result["approved"] is True + assert called_with == [], ( + "without HERMES_INTERACTIVE the non-interactive auto-approve " + "path should fire without consulting the callback" + ) + + # With HERMES_INTERACTIVE: callback IS called, approval flows through it + monkeypatch.setenv("HERMES_INTERACTIVE", "1") + called_with.clear() + result = check_all_command_guards( + "rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb, + ) + assert called_with, ( + "with HERMES_INTERACTIVE the approval path should consult the " + "registered callback — this was the ACP bypass in " + "GHSA-96vc-wcxf-jjff" + ) + assert result["approved"] is True diff --git a/tools/terminal_tool.py b/tools/terminal_tool.py index 7a7dc9c1a6..4a2a5fc0be 100644 --- a/tools/terminal_tool.py +++ b/tools/terminal_tool.py @@ -114,22 +114,44 @@ _cached_sudo_password: str = "" # Optional UI callbacks for interactive prompts. When set, these are called # instead of the default /dev/tty or input() readers. The CLI registers these # so prompts route through prompt_toolkit's event loop. -# _sudo_password_callback() -> str (return password or "" to skip) -# _approval_callback(command, description) -> str ("once"/"session"/"always"/"deny") -_sudo_password_callback = None -_approval_callback = None +# Callback slots used by the approval prompt and sudo password prompt +# routines. Stored in thread-local state so overlapping ACP sessions — +# each running in its own ThreadPoolExecutor thread — don't stomp on +# each other's callbacks. See GHSA-qg5c-hvr5-hjgr. +# +# CLI mode is single-threaded, so each thread (the only one) holds its +# own callback exactly like before. Gateway mode resolves approvals via +# the per-session queue in tools.approval, not through these callbacks, +# so it's unaffected. +import threading +_callback_tls = threading.local() + + +def _get_sudo_password_callback(): + return getattr(_callback_tls, "sudo_password", None) + + +def _get_approval_callback(): + return getattr(_callback_tls, "approval", None) def set_sudo_password_callback(cb): - """Register a callback for sudo password prompts (used by CLI).""" - global _sudo_password_callback - _sudo_password_callback = cb + """Register a callback for sudo password prompts (used by CLI). + + Per-thread scope — ACP sessions that run concurrently in a + ThreadPoolExecutor each have their own callback slot. + """ + _callback_tls.sudo_password = cb def set_approval_callback(cb): - """Register a callback for dangerous command approval prompts (used by CLI).""" - global _approval_callback - _approval_callback = cb + """Register a callback for dangerous command approval prompts. + + Per-thread scope — ACP sessions that run concurrently in a + ThreadPoolExecutor each have their own callback slot. See + GHSA-qg5c-hvr5-hjgr. + """ + _callback_tls.approval = cb # ============================================================================= # Dangerous Command Approval System @@ -144,7 +166,7 @@ from tools.approval import ( def _check_all_guards(command: str, env_type: str) -> dict: """Delegate to consolidated guard (tirith + dangerous cmd) with CLI callback.""" return _check_all_guards_impl(command, env_type, - approval_callback=_approval_callback) + approval_callback=_get_approval_callback()) # Allowlist: characters that can legitimately appear in directory paths. @@ -219,9 +241,10 @@ def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str: import sys # Use the registered callback when available (prompt_toolkit-compatible) - if _sudo_password_callback is not None: + _sudo_cb = _get_sudo_password_callback() + if _sudo_cb is not None: try: - return _sudo_password_callback() or "" + return _sudo_cb() or "" except Exception: return "" From 16accd44bdc5151aee8cac57e74fd3da15da3092 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 06:23:09 -0700 Subject: [PATCH 60/63] fix(telegram): require TELEGRAM_WEBHOOK_SECRET in webhook mode (#13527) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When TELEGRAM_WEBHOOK_URL was set but TELEGRAM_WEBHOOK_SECRET was not, python-telegram-bot received secret_token=None and the webhook endpoint accepted any HTTP POST. Anyone who could reach the listener could inject forged updates — spoofed user IDs, spoofed chat IDs, attacker-controlled message text — and trigger handlers as if Telegram delivered them. The fix refuses to start the adapter in webhook mode without the secret. Polling mode (default, no webhook URL) is unaffected — polling is authenticated by the bot token directly. BREAKING CHANGE for webhook-mode deployments that never set TELEGRAM_WEBHOOK_SECRET. The error message explains remediation: export TELEGRAM_WEBHOOK_SECRET="$(openssl rand -hex 32)" and instructs registering it with Telegram via setWebhook's secret_token parameter. Release notes must call this out. Reported in GHSA-3vpc-7q5r-276h by @bupt-Yy-young. Hardening — not CVE per SECURITY.md §3 "Public Exposure: Deploying the gateway to the public internet without external authentication or network protection" covers the historical default, but shipping a fail-open webhook as the default was the wrong choice and the guard aligns us with the SECURITY.md threat model. --- gateway/platforms/telegram.py | 22 +++- tests/gateway/test_telegram_webhook_secret.py | 100 ++++++++++++++++++ 2 files changed, 121 insertions(+), 1 deletion(-) create mode 100644 tests/gateway/test_telegram_webhook_secret.py diff --git a/gateway/platforms/telegram.py b/gateway/platforms/telegram.py index 156251e54c..bec0d690a3 100644 --- a/gateway/platforms/telegram.py +++ b/gateway/platforms/telegram.py @@ -794,8 +794,28 @@ class TelegramAdapter(BasePlatformAdapter): # Telegram pushes updates to our HTTP endpoint. This # enables cloud platforms (Fly.io, Railway) to auto-wake # suspended machines on inbound HTTP traffic. + # + # SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it, + # python-telegram-bot passes secret_token=None and the + # webhook endpoint accepts any HTTP POST — attackers can + # inject forged updates as if from Telegram. Refuse to + # start rather than silently run in fail-open mode. + # See GHSA-3vpc-7q5r-276h. webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443")) - webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None + webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() + if not webhook_secret: + raise RuntimeError( + "TELEGRAM_WEBHOOK_SECRET is required when " + "TELEGRAM_WEBHOOK_URL is set. Without it, the " + "webhook endpoint accepts forged updates from " + "anyone who can reach it — see " + "https://github.com/NousResearch/hermes-agent/" + "security/advisories/GHSA-3vpc-7q5r-276h.\n\n" + "Generate a secret and set it in your .env:\n" + " export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n" + "Then register it with Telegram when setting the " + "webhook via setWebhook's secret_token parameter." + ) from urllib.parse import urlparse webhook_path = urlparse(webhook_url).path or "/telegram" diff --git a/tests/gateway/test_telegram_webhook_secret.py b/tests/gateway/test_telegram_webhook_secret.py new file mode 100644 index 0000000000..0f1e786367 --- /dev/null +++ b/tests/gateway/test_telegram_webhook_secret.py @@ -0,0 +1,100 @@ +"""Tests for GHSA-3vpc-7q5r-276h — Telegram webhook secret required. + +Previously, when TELEGRAM_WEBHOOK_URL was set but TELEGRAM_WEBHOOK_SECRET +was not, python-telegram-bot received secret_token=None and the webhook +endpoint accepted any HTTP POST. + +The fix refuses to start the adapter in webhook mode without the secret. +""" + +from __future__ import annotations + +import re +import sys +from pathlib import Path + +import pytest + +_repo = str(Path(__file__).resolve().parents[2]) +if _repo not in sys.path: + sys.path.insert(0, _repo) + + +class TestTelegramWebhookSecretRequired: + """Direct source-level check of the webhook-secret guard. + + The guard is embedded in TelegramAdapter.connect() and hard to isolate + via mocks (requires a full python-telegram-bot ApplicationBuilder + chain). These tests exercise it via source inspection — verifying the + check exists, raises RuntimeError with the advisory link, and only + fires in webhook mode. End-to-end validation is covered by CI + + manual deployment tests. + """ + + def _get_source(self) -> str: + path = Path(_repo) / "gateway" / "platforms" / "telegram.py" + return path.read_text(encoding="utf-8") + + def test_webhook_branch_checks_secret(self): + """The webhook-mode branch of connect() must read + TELEGRAM_WEBHOOK_SECRET and refuse when empty.""" + src = self._get_source() + # The guard must appear after TELEGRAM_WEBHOOK_URL is set + assert re.search( + r'TELEGRAM_WEBHOOK_SECRET.*?\.strip\(\)\s*\n\s*if not webhook_secret:', + src, re.DOTALL, + ), ( + "TelegramAdapter.connect() must strip TELEGRAM_WEBHOOK_SECRET " + "and raise when the secret is empty — see GHSA-3vpc-7q5r-276h" + ) + + def test_guard_raises_runtime_error(self): + """The guard raises RuntimeError (not a silent log) so operators + see the failure at startup.""" + src = self._get_source() + # Between the "if not webhook_secret:" line and the next blank + # line block, we should see a RuntimeError being raised + guard_match = re.search( + r'if not webhook_secret:\s*\n\s*raise\s+RuntimeError\(', + src, + ) + assert guard_match, ( + "Missing webhook secret must raise RuntimeError — silent " + "fall-through was the original GHSA-3vpc-7q5r-276h bypass" + ) + + def test_guard_message_includes_advisory_link(self): + """The RuntimeError message should reference the advisory so + operators can read the full context.""" + src = self._get_source() + assert "GHSA-3vpc-7q5r-276h" in src, ( + "Guard error message must cite the advisory for operator context" + ) + + def test_guard_message_explains_remediation(self): + """The error should tell the operator how to fix it.""" + src = self._get_source() + # Should mention how to generate a secret + assert "openssl rand" in src or "TELEGRAM_WEBHOOK_SECRET=" in src, ( + "Guard error message should show operators how to set " + "TELEGRAM_WEBHOOK_SECRET" + ) + + def test_polling_branch_has_no_secret_guard(self): + """Polling mode (else-branch) must NOT require the webhook secret — + polling authenticates via the bot token, not a webhook secret.""" + src = self._get_source() + # The guard should appear inside the `if webhook_url:` branch, + # not the `else:` polling branch. Rough check: the raise is + # followed (within ~60 lines) by an `else:` that starts the + # polling branch, and there's no secret-check in that polling + # branch. + webhook_block = re.search( + r'if webhook_url:\s*\n(.*?)\n else:\s*\n(.*?)\n', + src, re.DOTALL, + ) + if webhook_block: + webhook_body = webhook_block.group(1) + polling_body = webhook_block.group(2) + assert "TELEGRAM_WEBHOOK_SECRET" in webhook_body + assert "TELEGRAM_WEBHOOK_SECRET" not in polling_body From 244ae6db15f3fc0b18038d3de73473bdd16dd43b Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 06:26:35 -0700 Subject: [PATCH 61/63] fix(web_server,whatsapp-bridge): validate Host header against bound interface (#13530) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DNS rebinding attack: a victim browser that has the dashboard (or the WhatsApp bridge) open could be tricked into fetching from an attacker-controlled hostname that TTL-flips to 127.0.0.1. Same-origin and CORS checks don't help — the browser now treats the attacker origin as same-origin with the local service. Validating the Host header at the app layer rejects any request whose Host isn't one we bound for. Changes: hermes_cli/web_server.py: - New host_header_middleware runs before auth_middleware. Reads app.state.bound_host (set by start_server) and rejects requests whose Host header doesn't match the bound interface with HTTP 400. - Loopback binds accept localhost / 127.0.0.1 / ::1. Non-loopback binds require exact match. 0.0.0.0 binds skip the check (explicit --insecure opt-in; no app-layer defence possible). - IPv6 bracket notation parsed correctly: [::1] and [::1]:9119 both accepted. scripts/whatsapp-bridge/bridge.js: - Express middleware rejects non-loopback Host headers. Bridge already binds 127.0.0.1-only, this adds the complementary app-layer check for DNS rebinding defence. Tests: 8 new in tests/hermes_cli/test_web_server_host_header.py covering loopback/non-loopback/zero-zero binds, IPv6 brackets, case insensitivity, and end-to-end middleware rejection via TestClient. Reported in GHSA-ppp5-vxwm-4cf7 by @bupt-Yy-young. Hardening — not CVE per SECURITY.md §3. The dashboard's main trust boundary is the loopback bind + session token; DNS rebinding defeats the bind assumption but not the token (since the rebinding browser still sees a first-party fetch to 127.0.0.1 with the token-gated API). Host-header validation adds the missing belt-and-braces layer. --- hermes_cli/web_server.py | 89 +++++++++++ scripts/whatsapp-bridge/bridge.js | 31 ++++ .../hermes_cli/test_web_server_host_header.py | 148 ++++++++++++++++++ 3 files changed, 268 insertions(+) create mode 100644 tests/hermes_cli/test_web_server_host_header.py diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index fe6b979e44..6cf1199253 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -114,6 +114,91 @@ def _require_token(request: Request) -> None: raise HTTPException(status_code=401, detail="Unauthorized") +# Accepted Host header values for loopback binds. DNS rebinding attacks +# point a victim browser at an attacker-controlled hostname (evil.test) +# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin +# checks because the browser now considers evil.test and our dashboard +# "same origin". Validating the Host header at the app layer rejects any +# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7. +_LOOPBACK_HOST_VALUES: frozenset = frozenset({ + "localhost", "127.0.0.1", "::1", +}) + + +def _is_accepted_host(host_header: str, bound_host: str) -> bool: + """True if the Host header targets the interface we bound to. + + Accepts: + - Exact bound host (with or without port suffix) + - Loopback aliases when bound to loopback + - Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback, + no protection possible at this layer) + """ + if not host_header: + return False + # Strip port suffix. IPv6 addresses use bracket notation: + # [::1] — no port + # [::1]:9119 — with port + # Plain hosts/v4: + # localhost:9119 + # 127.0.0.1:9119 + h = host_header.strip() + if h.startswith("["): + # IPv6 bracketed — port (if any) follows "]:" + close = h.find("]") + if close != -1: + host_only = h[1:close] # strip brackets + else: + host_only = h.strip("[]") + else: + host_only = h.rsplit(":", 1)[0] if ":" in h else h + host_only = host_only.lower() + + # 0.0.0.0 bind means operator explicitly opted into all-interfaces + # (requires --insecure per web_server.start_server). No Host-layer + # defence can protect that mode; rely on operator network controls. + if bound_host in ("0.0.0.0", "::"): + return True + + # Loopback bind: accept the loopback names + bound_lc = bound_host.lower() + if bound_lc in _LOOPBACK_HOST_VALUES: + return host_only in _LOOPBACK_HOST_VALUES + + # Explicit non-loopback bind: require exact host match + return host_only == bound_lc + + +@app.middleware("http") +async def host_header_middleware(request: Request, call_next): + """Reject requests whose Host header doesn't match the bound interface. + + Defends against DNS rebinding: a victim browser on a localhost + dashboard is tricked into fetching from an attacker hostname that + TTL-flips to 127.0.0.1. CORS and same-origin checks don't help — + the browser now treats the attacker origin as same-origin with the + dashboard. Host-header validation at the app layer catches it. + + See GHSA-ppp5-vxwm-4cf7. + """ + # Store the bound host on app.state so this middleware can read it — + # set by start_server() at listen time. + bound_host = getattr(app.state, "bound_host", None) + if bound_host: + host_header = request.headers.get("host", "") + if not _is_accepted_host(host_header, bound_host): + return JSONResponse( + status_code=400, + content={ + "detail": ( + "Invalid Host header. Dashboard requests must use " + "the hostname the server was bound to." + ), + }, + ) + return await call_next(request) + + @app.middleware("http") async def auth_middleware(request: Request, call_next): """Require the session token on all /api/ routes except the public list.""" @@ -2323,6 +2408,10 @@ def start_server( "authentication. Only use on trusted networks.", host, ) + # Record the bound host so host_header_middleware can validate incoming + # Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7). + app.state.bound_host = host + if open_browser: import webbrowser diff --git a/scripts/whatsapp-bridge/bridge.js b/scripts/whatsapp-bridge/bridge.js index 401651c8a8..d1aeb73722 100644 --- a/scripts/whatsapp-bridge/bridge.js +++ b/scripts/whatsapp-bridge/bridge.js @@ -372,6 +372,37 @@ async function startSocket() { const app = express(); app.use(express.json()); +// Host-header validation — defends against DNS rebinding. +// The bridge binds loopback-only (127.0.0.1) but a victim browser on +// the same machine could be tricked into fetching from an attacker +// hostname that TTL-flips to 127.0.0.1. Reject any request whose Host +// header doesn't resolve to a loopback alias. +// See GHSA-ppp5-vxwm-4cf7. +const _ACCEPTED_HOST_VALUES = new Set([ + 'localhost', + '127.0.0.1', + '[::1]', + '::1', +]); + +app.use((req, res, next) => { + const raw = (req.headers.host || '').trim(); + if (!raw) { + return res.status(400).json({ error: 'Missing Host header' }); + } + // Strip port suffix: "localhost:3000" → "localhost" + const hostOnly = (raw.includes(':') + ? raw.substring(0, raw.lastIndexOf(':')) + : raw + ).replace(/^\[|\]$/g, '').toLowerCase(); + if (!_ACCEPTED_HOST_VALUES.has(hostOnly)) { + return res.status(400).json({ + error: 'Invalid Host header. Bridge accepts loopback hosts only.', + }); + } + next(); +}); + // Poll for new messages (long-poll style) app.get('/messages', (req, res) => { const msgs = messageQueue.splice(0, messageQueue.length); diff --git a/tests/hermes_cli/test_web_server_host_header.py b/tests/hermes_cli/test_web_server_host_header.py new file mode 100644 index 0000000000..966127b05c --- /dev/null +++ b/tests/hermes_cli/test_web_server_host_header.py @@ -0,0 +1,148 @@ +"""Tests for GHSA-ppp5-vxwm-4cf7 — Host-header validation. + +DNS rebinding defence: a victim browser that has the dashboard open +could be tricked into fetching from an attacker-controlled hostname +that TTL-flips to 127.0.0.1. Same-origin / CORS checks won't help — +the browser now treats the attacker origin as same-origin. Validating +the Host header at the application layer rejects the attack. +""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +_repo = str(Path(__file__).resolve().parents[1]) +if _repo not in sys.path: + sys.path.insert(0, _repo) + + +class TestHostHeaderValidator: + """Unit test the _is_accepted_host helper directly — cheaper and + more thorough than spinning up the full FastAPI app.""" + + def test_loopback_bind_accepts_loopback_names(self): + from hermes_cli.web_server import _is_accepted_host + + for bound in ("127.0.0.1", "localhost", "::1"): + for host_header in ( + "127.0.0.1", "127.0.0.1:9119", + "localhost", "localhost:9119", + "[::1]", "[::1]:9119", + ): + assert _is_accepted_host(host_header, bound), ( + f"bound={bound} must accept host={host_header}" + ) + + def test_loopback_bind_rejects_attacker_hostnames(self): + """The core rebinding defence: attacker-controlled hosts that + TTL-flip to 127.0.0.1 must be rejected.""" + from hermes_cli.web_server import _is_accepted_host + + for bound in ("127.0.0.1", "localhost"): + for attacker in ( + "evil.example", + "evil.example:9119", + "rebind.attacker.test:80", + "localhost.attacker.test", # subdomain trick + "127.0.0.1.evil.test", # lookalike IP prefix + "", # missing Host + ): + assert not _is_accepted_host(attacker, bound), ( + f"bound={bound} must reject attacker host={attacker!r}" + ) + + def test_zero_zero_bind_accepts_anything(self): + """0.0.0.0 means operator explicitly opted into all-interfaces + (requires --insecure). No Host-layer defence is possible — rely + on operator network controls.""" + from hermes_cli.web_server import _is_accepted_host + + for host in ("10.0.0.5", "evil.example", "my-server.corp.net"): + assert _is_accepted_host(host, "0.0.0.0") + assert _is_accepted_host(host + ":9119", "0.0.0.0") + + def test_explicit_non_loopback_bind_requires_exact_match(self): + """If the operator bound to a specific non-loopback hostname, + the Host header must match exactly.""" + from hermes_cli.web_server import _is_accepted_host + + assert _is_accepted_host("my-server.corp.net", "my-server.corp.net") + assert _is_accepted_host("my-server.corp.net:9119", "my-server.corp.net") + # Different host — reject + assert not _is_accepted_host("evil.example", "my-server.corp.net") + # Loopback — reject (we bound to a specific non-loopback name) + assert not _is_accepted_host("localhost", "my-server.corp.net") + + def test_case_insensitive_comparison(self): + """Host headers are case-insensitive per RFC — accept variations.""" + from hermes_cli.web_server import _is_accepted_host + + assert _is_accepted_host("LOCALHOST", "127.0.0.1") + assert _is_accepted_host("LocalHost:9119", "127.0.0.1") + + +class TestHostHeaderMiddleware: + """End-to-end test via the FastAPI app — verify the middleware + rejects bad Host headers with 400.""" + + def test_rebinding_request_rejected(self): + from fastapi.testclient import TestClient + from hermes_cli.web_server import app + + # Simulate start_server having set the bound_host + app.state.bound_host = "127.0.0.1" + try: + client = TestClient(app) + # The TestClient sends Host: testserver by default — which is + # NOT a loopback alias, so the middleware must reject it. + resp = client.get( + "/api/status", + headers={"Host": "evil.example"}, + ) + assert resp.status_code == 400 + assert "Invalid Host header" in resp.json()["detail"] + finally: + # Clean up so other tests don't inherit the bound_host + if hasattr(app.state, "bound_host"): + del app.state.bound_host + + def test_legit_loopback_request_accepted(self): + from fastapi.testclient import TestClient + from hermes_cli.web_server import app + + app.state.bound_host = "127.0.0.1" + try: + client = TestClient(app) + # /api/status is in _PUBLIC_API_PATHS — passes auth — so the + # only thing that can reject is the host header middleware + resp = client.get( + "/api/status", + headers={"Host": "localhost:9119"}, + ) + # Either 200 (endpoint served) or some other non-400 — + # just not the host-rejection 400 + assert resp.status_code != 400 or ( + "Invalid Host header" not in resp.json().get("detail", "") + ) + finally: + if hasattr(app.state, "bound_host"): + del app.state.bound_host + + def test_no_bound_host_skips_validation(self): + """If app.state.bound_host isn't set (e.g. running under test + infra without calling start_server), middleware must pass through + rather than crash.""" + from fastapi.testclient import TestClient + from hermes_cli.web_server import app + + # Make sure bound_host isn't set + if hasattr(app.state, "bound_host"): + del app.state.bound_host + + client = TestClient(app) + resp = client.get("/api/status") + # Should get through to the status endpoint, not a 400 + assert resp.status_code != 400 From 5e0eed470fe8c4d8a4e6bd1f66365acf2a1f3e5d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 06:40:58 -0700 Subject: [PATCH 62/63] fix(cache): enable prompt caching for Qwen on OpenCode/OpenCode-Go/Alibaba (#13528) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Qwen models on OpenCode, OpenCode Go, and direct DashScope accept Anthropic-style cache_control markers on OpenAI-wire chat completions, but hermes only injected markers for Claude-named models. Result: zero cache hits on every turn, full prompt re-billed — a community user reported burning through their OpenCode Go subscription on Qwen3.6. Extend _anthropic_prompt_cache_policy to return (True, False) — envelope layout, not native — for the Alibaba provider family when the model name contains 'qwen'. Envelope layout places markers on inner content blocks (matching pi-mono's 'alibaba' cacheControlFormat) and correctly skips top-level markers on tool-role messages (which OpenCode rejects). Non-Qwen models on these providers (GLM, Kimi) keep their existing behaviour — they have automatic server-side caching and don't need client markers. Upstream reference: pi-mono #3392 / #3393 documented this contract for opencode-go Qwen models. Adds 7 regression tests covering Qwen3.5/3.6/coder on each affected provider plus negative cases for GLM/Kimi/OpenRouter-Qwen. --- run_agent.py | 27 ++++++- .../test_anthropic_prompt_cache_policy.py | 80 +++++++++++++++++++ 2 files changed, 106 insertions(+), 1 deletion(-) diff --git a/run_agent.py b/run_agent.py index ba8a2bf4ea..fadf28b314 100644 --- a/run_agent.py +++ b/run_agent.py @@ -2355,6 +2355,13 @@ class AIAgent: cost reduction as direct Anthropic callers, provided their gateway implements the Anthropic cache_control contract (MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do). + + Qwen / Alibaba-family models on OpenCode, OpenCode Go, and direct + Alibaba (DashScope) also honour Anthropic-style ``cache_control`` + markers on OpenAI-wire chat completions. Upstream pi-mono #3392 / + pi #3393 documented this for opencode-go Qwen. Without markers + these providers serve zero cache hits, re-billing the full prompt + on every turn. """ eff_provider = (provider if provider is not None else self.provider) or "" eff_base_url = base_url if base_url is not None else (self.base_url or "") @@ -2362,7 +2369,9 @@ class AIAgent: eff_model = (model if model is not None else self.model) or "" base_lower = eff_base_url.lower() - is_claude = "claude" in eff_model.lower() + model_lower = eff_model.lower() + provider_lower = eff_provider.lower() + is_claude = "claude" in model_lower is_openrouter = base_url_host_matches(eff_base_url, "openrouter.ai") is_anthropic_wire = eff_api_mode == "anthropic_messages" is_native_anthropic = ( @@ -2377,6 +2386,22 @@ class AIAgent: if is_anthropic_wire and is_claude: # Third-party Anthropic-compatible gateway. return True, True + + # Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire + # transport that accepts Anthropic-style cache_control markers and + # rewards them with real cache hits. Without this branch + # qwen3.6-plus on opencode-go reports 0% cached tokens and burns + # through the subscription on every turn. + model_is_qwen = "qwen" in model_lower + provider_is_alibaba_family = provider_lower in { + "opencode", "opencode-zen", "opencode-go", "alibaba", + } + if provider_is_alibaba_family and model_is_qwen: + # Envelope layout (native_anthropic=False): markers on inner + # content parts, not top-level tool messages. Matches + # pi-mono's "alibaba" cacheControlFormat. + return True, False + return False, False @staticmethod diff --git a/tests/run_agent/test_anthropic_prompt_cache_policy.py b/tests/run_agent/test_anthropic_prompt_cache_policy.py index 7d5a166544..7a85022a5c 100644 --- a/tests/run_agent/test_anthropic_prompt_cache_policy.py +++ b/tests/run_agent/test_anthropic_prompt_cache_policy.py @@ -118,6 +118,86 @@ class TestOpenAIWireFormatOnCustomProvider: assert agent._anthropic_prompt_cache_policy() == (False, False) +class TestQwenAlibabaFamily: + """Qwen on OpenCode/OpenCode-Go/Alibaba — needs cache_control even on OpenAI-wire. + + Upstream pi-mono #3392 / #3393 documented that these providers serve + zero cache hits without Anthropic-style markers. Regression reported + by community user (Qwen3.6 on opencode-go burning through + subscription with no cache). Envelope layout, not native, because the + wire format is OpenAI chat.completions. + """ + + def test_qwen_on_opencode_go_caches_with_envelope_layout(self): + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="qwen3.6-plus", + ) + should, native = agent._anthropic_prompt_cache_policy() + assert should is True, "Qwen on opencode-go must cache" + assert native is False, "opencode-go is OpenAI-wire; envelope layout" + + def test_qwen35_plus_on_opencode_go(self): + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="qwen3.5-plus", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_qwen_on_opencode_zen_caches(self): + agent = _make_agent( + provider="opencode", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="qwen3-coder-plus", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_qwen_on_direct_alibaba_caches(self): + agent = _make_agent( + provider="alibaba", + base_url="https://dashscope.aliyuncs.com/compatible-mode/v1", + api_mode="chat_completions", + model="qwen3-coder", + ) + assert agent._anthropic_prompt_cache_policy() == (True, False) + + def test_non_qwen_on_opencode_go_does_not_cache(self): + # GLM / Kimi on opencode-go don't need markers (they have automatic + # server-side caching or none at all). + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="glm-5", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + def test_kimi_on_opencode_go_does_not_cache(self): + agent = _make_agent( + provider="opencode-go", + base_url="https://opencode.ai/v1", + api_mode="chat_completions", + model="kimi-k2.5", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + def test_qwen_on_openrouter_not_affected(self): + # Qwen via OpenRouter falls through — OpenRouter has its own + # upstream caching arrangement for Qwen (provider-dependent). + agent = _make_agent( + provider="openrouter", + base_url="https://openrouter.ai/api/v1", + api_mode="chat_completions", + model="qwen/qwen3-coder", + ) + assert agent._anthropic_prompt_cache_policy() == (False, False) + + class TestExplicitOverrides: """Policy accepts keyword overrides for switch_model / fallback activation.""" From 432772dbdf63eae379b76b3811c51284c36bb817 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 21 Apr 2026 06:42:32 -0700 Subject: [PATCH 63/63] fix(cache): surface cache-hit telemetry for all providers, not just Anthropic-wire (#13543) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 💾 Cache footer was gated on `self._use_prompt_caching`, which is only True for Anthropic marker injection (native Anthropic, OpenRouter Claude, Anthropic-wire gateways, Qwen on OpenCode/Alibaba). Providers with automatic server-side prefix caching — OpenAI, Kimi, DeepSeek, Qwen on OpenRouter — return `prompt_tokens_details.cached_tokens` too, but users couldn't see their cache % because the display path never fired for them. Result: people couldn't tell their cache was working or broken without grepping agent.log. `canonical_usage` from `normalize_usage()` already unifies all three API shapes (Anthropic / Codex Responses / OpenAI chat completions) into `cache_read_tokens` and `cache_write_tokens`. Drop the gate and read from there — now the footer fires whenever the provider reported any cached or written tokens, regardless of whether hermes injected markers. Also removes duplicated branch-per-API-shape extraction code. --- run_agent.py | 35 ++++++++++++++++++++--------------- 1 file changed, 20 insertions(+), 15 deletions(-) diff --git a/run_agent.py b/run_agent.py index fadf28b314..c5881b87f6 100644 --- a/run_agent.py +++ b/run_agent.py @@ -9907,22 +9907,27 @@ class AIAgent: if self.verbose_logging: logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}") - # Log cache hit stats when prompt caching is active - if self._use_prompt_caching: - if self.api_mode == "anthropic_messages": - _tcs = self._get_anthropic_transport() - _cache = _tcs.extract_cache_stats(response) - cached = _cache["cached_tokens"] if _cache else 0 - written = _cache["creation_tokens"] if _cache else 0 - else: - # OpenRouter uses prompt_tokens_details.cached_tokens - details = getattr(response.usage, 'prompt_tokens_details', None) - cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0 - written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0 - prompt = usage_dict["prompt_tokens"] + # Surface cache hit stats for any provider that reports + # them — not just those where we inject cache_control + # markers. OpenAI/Kimi/DeepSeek/Qwen all do automatic + # server-side prefix caching and return + # ``prompt_tokens_details.cached_tokens``; users + # previously could not see their cache % because this + # line was gated on ``_use_prompt_caching``, which is + # only True for Anthropic-style marker injection. + # ``canonical_usage`` is already normalised from all + # three API shapes (Anthropic / Codex / OpenAI-chat) + # so we can rely on its values directly. + cached = canonical_usage.cache_read_tokens + written = canonical_usage.cache_write_tokens + prompt = usage_dict["prompt_tokens"] + if (cached or written) and not self.quiet_mode: hit_pct = (cached / prompt * 100) if prompt > 0 else 0 - if not self.quiet_mode: - self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)") + self._vprint( + f"{self.log_prefix} 💾 Cache: " + f"{cached:,}/{prompt:,} tokens " + f"({hit_pct:.0f}% hit, {written:,} written)" + ) has_retried_429 = False # Reset on success # Clear Nous rate limit state on successful request —