From 0dd26c9495e312a5f64b58d6d41d92e93610a22d Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:50:24 -0700 Subject: [PATCH 01/69] fix(tests): fix 78 CI test failures and remove dead test (#9036) Production fixes: - voice_mode.py: add is_recording property to AudioRecorder (parity with TermuxAudioRecorder) - cronjob_tools.py: add sms example to deliver description Test fixes: - test_real_interrupt_subagent: add missing _execution_thread_id (fixes 19 cascading failures from leaked _build_system_prompt patch) - test_anthropic_error_handling: add _FakeMessages, override _interruptible_streaming_api_call (6 fixes) - test_ctx_halving_fix: add missing request_overrides attribute (4 fixes) - test_context_token_tracking: set _disable_streaming=True for non-streaming test path (4 fixes) - test_dict_tool_call_args: set _disable_streaming=True (1 fix) - test_provider_parity: add model='gpt-4o' for AIGateway tests to meet 64K minimum context (4 fixes) - test_session_race_guard: add user_id to SessionSource (5 fixes) - test_restart_drain/helpers: add user_id to SessionSource (2 fixes) - test_telegram_photo_interrupts: add user_id to SessionSource - test_interrupt: target thread_id for per-thread interrupt system (2 fixes) - test_zombie_process_cleanup: rewrite with object.__new__ for refactored GatewayRunner.stop() (1 fix) - test_browser_camofox_state: update config version 15->17 (1 fix) - test_trajectory_compressor_async: widen lookback window 10->20 for line-shifted AsyncOpenAI (1 fix) - test_voice_mode: fixed by production is_recording addition (5 fixes) - test_voice_cli_integration: add _attached_images to CLI stub (2 fixes) - test_hermes_logging: explicit propagation/level reset for cross-test pollution defense (1 fix) - test_run_agent: add base_url for OpenRouter detection tests (2 fixes) Deleted: - test_inline_think_blocks_reasoning_only_accepted: tested unimplemented inline handling --- tests/gateway/restart_test_helpers.py | 1 + tests/gateway/test_session_race_guard.py | 10 ++++--- .../gateway/test_telegram_photo_interrupts.py | 2 +- .../test_anthropic_error_handling.py | 21 ++++++++++++--- .../run_agent/test_context_token_tracking.py | 1 + tests/run_agent/test_dict_tool_call_args.py | 1 + tests/run_agent/test_provider_parity.py | 15 ++++++----- .../run_agent/test_real_interrupt_subagent.py | 3 ++- tests/run_agent/test_run_agent.py | 23 ++-------------- tests/test_ctx_halving_fix.py | 2 ++ tests/test_hermes_logging.py | 13 +++++++-- tests/test_trajectory_compressor_async.py | 2 +- tests/tools/test_browser_camofox_state.py | 2 +- tests/tools/test_interrupt.py | 11 ++++---- tests/tools/test_voice_cli_integration.py | 1 + tests/tools/test_zombie_process_cleanup.py | 27 ++++++++++++++++--- tools/cronjob_tools.py | 2 +- tools/voice_mode.py | 5 ++++ 18 files changed, 92 insertions(+), 50 deletions(-) diff --git a/tests/gateway/restart_test_helpers.py b/tests/gateway/restart_test_helpers.py index 54dcd69b92..8b48974673 100644 --- a/tests/gateway/restart_test_helpers.py +++ b/tests/gateway/restart_test_helpers.py @@ -35,6 +35,7 @@ def make_restart_source(chat_id: str = "123456", chat_type: str = "dm") -> Sessi platform=Platform.TELEGRAM, chat_id=chat_id, chat_type=chat_type, + user_id="u1", ) diff --git a/tests/gateway/test_session_race_guard.py b/tests/gateway/test_session_race_guard.py index 7a4f6f1011..c9e226b67a 100644 --- a/tests/gateway/test_session_race_guard.py +++ b/tests/gateway/test_session_race_guard.py @@ -60,7 +60,8 @@ def _make_runner(): def _make_event(text="hello", chat_id="12345"): source = SessionSource( - platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm" + platform=Platform.TELEGRAM, chat_id=chat_id, chat_type="dm", + user_id="u1", ) return MessageEvent(text=text, message_type=MessageType.TEXT, source=source) @@ -192,7 +193,8 @@ async def test_command_messages_do_not_leave_sentinel(): _handle_message. They must NOT leave a sentinel behind.""" runner = _make_runner() source = SessionSource( - platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm" + platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", + user_id="u1", ) event = MessageEvent( text="/help", message_type=MessageType.TEXT, source=source @@ -268,7 +270,7 @@ async def test_stop_hard_kills_running_agent(): forever — showing 'writing...' but never producing output.""" runner = _make_runner() session_key = build_session_key( - SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm") + SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1") ) # Simulate a running (possibly hung) agent @@ -301,7 +303,7 @@ async def test_stop_clears_pending_messages(): queued during the run must be discarded.""" runner = _make_runner() session_key = build_session_key( - SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm") + SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1") ) fake_agent = MagicMock() diff --git a/tests/gateway/test_telegram_photo_interrupts.py b/tests/gateway/test_telegram_photo_interrupts.py index 9235e539db..e808e68dbe 100644 --- a/tests/gateway/test_telegram_photo_interrupts.py +++ b/tests/gateway/test_telegram_photo_interrupts.py @@ -29,7 +29,7 @@ def _make_runner(): @pytest.mark.asyncio async def test_handle_message_does_not_priority_interrupt_photo_followup(): runner = _make_runner() - source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm") + source = SessionSource(platform=Platform.TELEGRAM, chat_id="12345", chat_type="dm", user_id="u1") session_key = build_session_key(source) running_agent = MagicMock() runner._running_agents[session_key] = running_agent diff --git a/tests/run_agent/test_anthropic_error_handling.py b/tests/run_agent/test_anthropic_error_handling.py index 3d7660aa8d..00055928e0 100644 --- a/tests/run_agent/test_anthropic_error_handling.py +++ b/tests/run_agent/test_anthropic_error_handling.py @@ -102,7 +102,19 @@ class _PromptTooLongError(Exception): self.status_code = 400 +class _FakeMessages: + """Stub for client.messages.create() / client.messages.stream().""" + def create(self, **kwargs): + raise NotImplementedError("_FakeAnthropicClient.messages.create should not be called directly in tests") + + def stream(self, **kwargs): + raise NotImplementedError("_FakeAnthropicClient.messages.stream should not be called directly in tests") + + class _FakeAnthropicClient: + def __init__(self): + self.messages = _FakeMessages() + def close(self): pass @@ -131,13 +143,14 @@ def _make_agent_cls(error_cls, recover_after=None): def run_conversation(self, user_message, conversation_history=None, task_id=None): calls = {"n": 0} - def _fake_api_call(api_kwargs): + def _fake_api_call(api_kwargs, **kw): calls["n"] += 1 if recover_after is not None and calls["n"] > recover_after: return _anthropic_response("Recovered") raise error_cls() self._interruptible_api_call = _fake_api_call + self._interruptible_streaming_api_call = _fake_api_call return super().run_conversation( user_message, conversation_history=conversation_history, task_id=task_id ) @@ -352,10 +365,11 @@ def test_401_refresh_fails_is_non_retryable(monkeypatch): return False # Simulate failed credential refresh def run_conversation(self, user_message, conversation_history=None, task_id=None): - def _fake_api_call(api_kwargs): + def _fake_api_call(api_kwargs, **kw): raise _UnauthorizedError() self._interruptible_api_call = _fake_api_call + self._interruptible_streaming_api_call = _fake_api_call return super().run_conversation( user_message, conversation_history=conversation_history, task_id=task_id ) @@ -436,13 +450,14 @@ def test_prompt_too_long_triggers_compression(monkeypatch): def run_conversation(self, user_message, conversation_history=None, task_id=None): calls = {"n": 0} - def _fake_api_call(api_kwargs): + def _fake_api_call(api_kwargs, **kw): calls["n"] += 1 if calls["n"] == 1: raise _PromptTooLongError() return _anthropic_response("Compressed and recovered") self._interruptible_api_call = _fake_api_call + self._interruptible_streaming_api_call = _fake_api_call return super().run_conversation( user_message, conversation_history=conversation_history, task_id=task_id ) diff --git a/tests/run_agent/test_context_token_tracking.py b/tests/run_agent/test_context_token_tracking.py index 377a04a5d2..b924448b64 100644 --- a/tests/run_agent/test_context_token_tracking.py +++ b/tests/run_agent/test_context_token_tracking.py @@ -56,6 +56,7 @@ def _make_agent(monkeypatch, api_mode, provider, response_fn): def run_conversation(self, msg, conversation_history=None, task_id=None): self._interruptible_api_call = lambda kw: response_fn() + self._disable_streaming = True return super().run_conversation(msg, conversation_history=conversation_history, task_id=task_id) return _A(model="test-model", api_key="test-key", provider=provider, api_mode=api_mode) diff --git a/tests/run_agent/test_dict_tool_call_args.py b/tests/run_agent/test_dict_tool_call_args.py index e8b4d70fa7..61ee6fc5c2 100644 --- a/tests/run_agent/test_dict_tool_call_args.py +++ b/tests/run_agent/test_dict_tool_call_args.py @@ -66,6 +66,7 @@ def test_tool_call_validation_accepts_dict_arguments(monkeypatch): quiet_mode=True, skip_memory=True, ) + agent._disable_streaming = True result = agent.run_conversation("read the file") diff --git a/tests/run_agent/test_provider_parity.py b/tests/run_agent/test_provider_parity.py index 067ecf6720..c0c62b01bd 100644 --- a/tests/run_agent/test_provider_parity.py +++ b/tests/run_agent/test_provider_parity.py @@ -44,11 +44,11 @@ class _FakeOpenAI: pass -def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1"): +def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="https://openrouter.ai/api/v1", model=None): monkeypatch.setattr("run_agent.get_tool_definitions", lambda **kw: _tool_defs("web_search", "terminal")) monkeypatch.setattr("run_agent.check_toolset_requirements", lambda: {}) monkeypatch.setattr("run_agent.OpenAI", _FakeOpenAI) - return AIAgent( + kwargs = dict( api_key="test-key", base_url=base_url, provider=provider, @@ -58,6 +58,9 @@ def _make_agent(monkeypatch, provider, api_mode="chat_completions", base_url="ht skip_context_files=True, skip_memory=True, ) + if model: + kwargs["model"] = model + return AIAgent(**kwargs) # ── _build_api_kwargs tests ───────────────────────────────────────────────── @@ -247,7 +250,7 @@ class TestBuildApiKwargsChatCompletionsServiceTier: class TestBuildApiKwargsAIGateway: def test_uses_chat_completions_format(self, monkeypatch): - agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o") messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert "messages" in kwargs @@ -255,7 +258,7 @@ class TestBuildApiKwargsAIGateway: assert kwargs["messages"][-1]["content"] == "hi" def test_no_responses_api_fields(self, monkeypatch): - agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o") messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert "input" not in kwargs @@ -263,7 +266,7 @@ class TestBuildApiKwargsAIGateway: assert "store" not in kwargs def test_includes_reasoning_in_extra_body(self, monkeypatch): - agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o") messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) extra = kwargs.get("extra_body", {}) @@ -271,7 +274,7 @@ class TestBuildApiKwargsAIGateway: assert extra["reasoning"]["enabled"] is True def test_includes_tools(self, monkeypatch): - agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1") + agent = _make_agent(monkeypatch, "ai-gateway", base_url="https://ai-gateway.vercel.sh/v1", model="gpt-4o") messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) assert "tools" in kwargs diff --git a/tests/run_agent/test_real_interrupt_subagent.py b/tests/run_agent/test_real_interrupt_subagent.py index e0e681cdf4..39b4c58e2d 100644 --- a/tests/run_agent/test_real_interrupt_subagent.py +++ b/tests/run_agent/test_real_interrupt_subagent.py @@ -76,7 +76,8 @@ class TestRealSubagentInterrupt(unittest.TestCase): parent._delegate_spinner = None parent.tool_progress_callback = None parent.iteration_budget = IterationBudget(max_total=100) - parent._client_kwargs = {"api_key": "test", "base_url": "http://localhost:1"} + parent._client_kwargs = {"api_key": "***", "base_url": "http://localhost:1"} + parent._execution_thread_id = None from tools.delegate_tool import _run_single_child diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 7d0ddd1c83..568077fd7b 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -880,6 +880,7 @@ class TestBuildApiKwargs: assert kwargs["extra_body"]["reasoning"] == {"enabled": False} def test_reasoning_not_sent_for_unsupported_openrouter_model(self, agent): + agent.base_url = "https://openrouter.ai/api/v1" agent.model = "minimax/minimax-m2.5" messages = [{"role": "user", "content": "hi"}] kwargs = agent._build_api_kwargs(messages) @@ -1575,6 +1576,7 @@ class TestHandleMaxIterations: assert "API down" in result def test_summary_skips_reasoning_for_unsupported_openrouter_model(self, agent): + agent.base_url = "https://openrouter.ai/api/v1" agent.model = "minimax/minimax-m2.5" resp = _mock_response(content="Summary") agent.client.chat.completions.create.return_value = resp @@ -1705,27 +1707,6 @@ class TestRunConversation: assert result["completed"] is True assert result["api_calls"] == 2 - def test_inline_think_blocks_reasoning_only_accepted(self, agent): - """Inline reasoning-only responses accepted with (empty) content, no retries.""" - self._setup_agent(agent) - empty_resp = _mock_response( - content="internal reasoning", - finish_reason="stop", - ) - agent.client.chat.completions.create.side_effect = [empty_resp] - with ( - patch.object(agent, "_persist_session"), - patch.object(agent, "_save_trajectory"), - patch.object(agent, "_cleanup_task_resources"), - ): - result = agent.run_conversation("answer me") - assert result["completed"] is True - assert result["final_response"] == "(empty)" - assert result["api_calls"] == 1 # no retries - # Reasoning should be preserved in the assistant message - assistant_msgs = [m for m in result["messages"] if m.get("role") == "assistant"] - assert any(m.get("reasoning") for m in assistant_msgs) - def test_reasoning_only_local_resumed_no_compression_triggered(self, agent): """Reasoning-only responses no longer trigger compression — prefill then accepted.""" self._setup_agent(agent) diff --git a/tests/test_ctx_halving_fix.py b/tests/test_ctx_halving_fix.py index 1ba423c8ff..0dd3ca4e7e 100644 --- a/tests/test_ctx_halving_fix.py +++ b/tests/test_ctx_halving_fix.py @@ -179,6 +179,7 @@ class TestEphemeralMaxOutputTokens: return_value=[{"role": "user", "content": "hi"}] ) agent._anthropic_preserve_dots = MagicMock(return_value=False) + agent.request_overrides = {} return agent def test_ephemeral_override_is_used_on_first_call(self): @@ -253,6 +254,7 @@ class TestContextNotHalvedOnOutputCapError: ) agent._anthropic_preserve_dots = MagicMock(return_value=False) agent._vprint = MagicMock() + agent.request_overrides = {} return agent def test_output_cap_error_sets_ephemeral_not_context_length(self): diff --git a/tests/test_hermes_logging.py b/tests/test_hermes_logging.py index 46969d58d6..586a4d6666 100644 --- a/tests/test_hermes_logging.py +++ b/tests/test_hermes_logging.py @@ -298,8 +298,17 @@ class TestGatewayMode: """agent.log (catch-all) still receives gateway AND tool records.""" hermes_logging.setup_logging(hermes_home=hermes_home, mode="gateway") - logging.getLogger("gateway.run").info("gateway msg") - logging.getLogger("tools.file_tools").info("file msg") + gw_logger = logging.getLogger("gateway.run") + file_logger = logging.getLogger("tools.file_tools") + # Ensure propagation and levels are clean (cross-test pollution defense) + gw_logger.propagate = True + file_logger.propagate = True + logging.getLogger("tools").propagate = True + file_logger.setLevel(logging.NOTSET) + logging.getLogger("tools").setLevel(logging.NOTSET) + + gw_logger.info("gateway msg") + file_logger.info("file msg") for h in logging.getLogger().handlers: h.flush() diff --git a/tests/test_trajectory_compressor_async.py b/tests/test_trajectory_compressor_async.py index 2b276d03d0..1c671471d9 100644 --- a/tests/test_trajectory_compressor_async.py +++ b/tests/test_trajectory_compressor_async.py @@ -103,7 +103,7 @@ class TestSourceLineVerification: if "self.async_client = AsyncOpenAI(" in line and "_get_async_client" not in lines[max(0,i-3):i+1]: # Allow it inside _get_async_client method # Check if we're inside _get_async_client by looking at context - context = "\n".join(lines[max(0,i-10):i+1]) + context = "\n".join(lines[max(0,i-20):i+1]) if "_get_async_client" not in context: pytest.fail( f"Line {i}: AsyncOpenAI created eagerly outside _get_async_client()" diff --git a/tests/tools/test_browser_camofox_state.py b/tests/tools/test_browser_camofox_state.py index 33a939f094..475e8c2d02 100644 --- a/tests/tools/test_browser_camofox_state.py +++ b/tests/tools/test_browser_camofox_state.py @@ -64,4 +64,4 @@ class TestCamofoxConfigDefaults: # The current schema version is tracked globally; unrelated default # options may bump it after browser defaults are added. - assert DEFAULT_CONFIG["_config_version"] == 15 + assert DEFAULT_CONFIG["_config_version"] == 17 diff --git a/tests/tools/test_interrupt.py b/tests/tools/test_interrupt.py index dc0ab45990..13b5041d67 100644 --- a/tests/tools/test_interrupt.py +++ b/tests/tools/test_interrupt.py @@ -28,7 +28,7 @@ class TestInterruptModule: assert not is_interrupted() def test_thread_safety(self): - """Set from one thread, check from another.""" + """Set from one thread targeting another thread's ident.""" from tools.interrupt import set_interrupt, is_interrupted set_interrupt(False) @@ -45,11 +45,12 @@ class TestInterruptModule: time.sleep(0.05) assert not seen["value"] - set_interrupt(True) + # Target the checker thread's ident so it sees the interrupt + set_interrupt(True, thread_id=t.ident) t.join(timeout=1) assert seen["value"] - set_interrupt(False) + set_interrupt(False, thread_id=t.ident) # --------------------------------------------------------------------------- @@ -189,10 +190,10 @@ class TestSIGKILLEscalation: t.start() time.sleep(0.5) - set_interrupt(True) + set_interrupt(True, thread_id=t.ident) t.join(timeout=5) - set_interrupt(False) + set_interrupt(False, thread_id=t.ident) assert result_holder["value"] is not None assert result_holder["value"]["returncode"] == 130 diff --git a/tests/tools/test_voice_cli_integration.py b/tests/tools/test_voice_cli_integration.py index 39fa026ce6..da500996a1 100644 --- a/tests/tools/test_voice_cli_integration.py +++ b/tests/tools/test_voice_cli_integration.py @@ -32,6 +32,7 @@ def _make_voice_cli(**overrides): cli._voice_tts_done.set() cli._pending_input = queue.Queue() cli._app = None + cli._attached_images = [] cli.console = SimpleNamespace(width=80) for k, v in overrides.items(): setattr(cli, k, v) diff --git a/tests/tools/test_zombie_process_cleanup.py b/tests/tools/test_zombie_process_cleanup.py index 9cbbbcd1fd..999bc3fe7e 100644 --- a/tests/tools/test_zombie_process_cleanup.py +++ b/tests/tools/test_zombie_process_cleanup.py @@ -190,17 +190,38 @@ class TestGatewayCleanupWiring: def test_gateway_stop_calls_close(self): """gateway stop() should call close() on all running agents.""" import asyncio - from unittest.mock import MagicMock, patch + import threading + from unittest.mock import AsyncMock, MagicMock, patch - runner = MagicMock() + from gateway.run import GatewayRunner + + runner = object.__new__(GatewayRunner) runner._running = True runner._running_agents = {} + runner._running_agents_ts = {} runner.adapters = {} runner._background_tasks = set() runner._pending_messages = {} runner._pending_approvals = {} + runner._pending_model_notes = {} runner._shutdown_event = asyncio.Event() runner._exit_reason = None + runner._exit_code = None + runner._stop_task = None + runner._draining = False + runner._restart_requested = False + runner._restart_task_started = False + runner._restart_detached = False + runner._restart_via_service = False + runner._restart_drain_timeout = 5.0 + runner._voice_mode = {} + runner._session_model_overrides = {} + runner._update_prompt_pending = {} + runner._busy_input_mode = "interrupt" + runner._agent_cache = {} + runner._agent_cache_lock = threading.Lock() + runner._shutdown_all_gateway_honcho = lambda: None + runner._update_runtime_status = MagicMock() mock_agent_1 = MagicMock() mock_agent_2 = MagicMock() @@ -209,8 +230,6 @@ class TestGatewayCleanupWiring: "session-2": mock_agent_2, } - from gateway.run import GatewayRunner - loop = asyncio.new_event_loop() try: with patch("gateway.status.remove_pid_file"), \ diff --git a/tools/cronjob_tools.py b/tools/cronjob_tools.py index d5c81ad7a8..90ecde65ab 100644 --- a/tools/cronjob_tools.py +++ b/tools/cronjob_tools.py @@ -465,7 +465,7 @@ Important safety rule: cron-run sessions should not recursively schedule more cr }, "deliver": { "type": "string", - "description": "Omit this parameter to auto-deliver back to the current chat and topic (recommended). Auto-detection preserves thread/topic context. Only set explicitly when the user asks to deliver somewhere OTHER than the current conversation. Values: 'origin' (same as omitting), 'local' (no delivery, save only), or platform:chat_id:thread_id for a specific destination. Examples: 'telegram:-1001234567890:17585', 'discord:#engineering'. WARNING: 'platform:chat_id' without :thread_id loses topic targeting." + "description": "Omit this parameter to auto-deliver back to the current chat and topic (recommended). Auto-detection preserves thread/topic context. Only set explicitly when the user asks to deliver somewhere OTHER than the current conversation. Values: 'origin' (same as omitting), 'local' (no delivery, save only), or platform:chat_id:thread_id for a specific destination. Examples: 'telegram:-1001234567890:17585', 'discord:#engineering', 'sms:+15551234567'. WARNING: 'platform:chat_id' without :thread_id loses topic targeting." }, "skills": { "type": "array", diff --git a/tools/voice_mode.py b/tools/voice_mode.py index 2beab4f4f7..5dc99070c2 100644 --- a/tools/voice_mode.py +++ b/tools/voice_mode.py @@ -429,6 +429,11 @@ class AudioRecorder: """Current audio input RMS level (0-32767). Updated each audio chunk.""" return self._current_rms + @property + def is_recording(self) -> bool: + """Whether audio recording is currently active.""" + return self._recording + # -- public methods ------------------------------------------------------ def _ensure_stream(self) -> None: From c449cd1af58c00225df29364a2c67c203d8b4582 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A2=A8=E7=B6=A0BG?= Date: Mon, 13 Apr 2026 05:26:32 -0700 Subject: [PATCH 02/69] =?UTF-8?q?fix(config):=20restore=20custom=20provide?= =?UTF-8?q?rs=20after=20v11=E2=86=92v12=20migration?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The v11→v12 migration converts custom_providers (list) into providers (dict), then deletes the list. But all runtime resolvers read from custom_providers — after migration, named custom endpoints silently stop resolving and fallback chains fail with AuthError. Add get_compatible_custom_providers() that reads from both config schemas (legacy custom_providers list + v12+ providers dict), normalizes entries, deduplicates, and returns a unified list. Update ALL consumers: - hermes_cli/runtime_provider.py: _get_named_custom_provider() + key_env - hermes_cli/auth_commands.py: credential pool provider names - hermes_cli/main.py: model picker + _model_flow_named_custom() - agent/auxiliary_client.py: key_env + custom_entry model fallback - agent/credential_pool.py: _iter_custom_providers() - cli.py + gateway/run.py: /model switch custom_providers passthrough - run_agent.py + gateway/run.py: per-model context_length lookup Also: use config.pop() instead of del for safer migration, fix stale _config_version assertions in tests, add pool mock to codex test. Co-authored-by: 墨綠BG Closes #8776, salvaged from PR #8814 --- agent/auxiliary_client.py | 8 +- agent/credential_pool.py | 8 + cli.py | 4 +- gateway/run.py | 41 +++-- hermes_cli/auth_commands.py | 18 +-- hermes_cli/config.py | 134 +++++++++++++++- hermes_cli/main.py | 54 +++++-- hermes_cli/runtime_provider.py | 30 ++-- run_agent.py | 41 ++--- tests/hermes_cli/test_config.py | 143 +++++++++++++++++- .../test_runtime_provider_resolution.py | 86 +++++++++++ 11 files changed, 495 insertions(+), 72 deletions(-) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 84f023f83b..c1c2e2f9a3 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1438,10 +1438,14 @@ def resolve_provider_client( custom_entry = _get_named_custom_provider(provider) if custom_entry: custom_base = custom_entry.get("base_url", "").strip() - custom_key = custom_entry.get("api_key", "").strip() or "no-key-required" + custom_key = custom_entry.get("api_key", "").strip() + custom_key_env = custom_entry.get("key_env", "").strip() + if not custom_key and custom_key_env: + custom_key = os.getenv(custom_key_env, "").strip() + custom_key = custom_key or "no-key-required" if custom_base: final_model = _normalize_resolved_model( - model or _read_main_model() or "gpt-4o-mini", + model or custom_entry.get("model") or _read_main_model() or "gpt-4o-mini", provider, ) client = OpenAI(api_key=custom_key, base_url=custom_base) diff --git a/agent/credential_pool.py b/agent/credential_pool.py index e067fb9014..ea9ad92329 100644 --- a/agent/credential_pool.py +++ b/agent/credential_pool.py @@ -289,6 +289,14 @@ def _iter_custom_providers(config: Optional[dict] = None): return custom_providers = config.get("custom_providers") if not isinstance(custom_providers, list): + # Fall back to the v12+ providers dict via the compatibility layer + try: + from hermes_cli.config import get_compatible_custom_providers + + custom_providers = get_compatible_custom_providers(config) + except Exception: + return + if not custom_providers: return for entry in custom_providers: if not isinstance(entry, dict): diff --git a/cli.py b/cli.py index a61bcd9d33..dcb5bfcc5f 100644 --- a/cli.py +++ b/cli.py @@ -4710,10 +4710,10 @@ class HermesCLI: user_provs = None custom_provs = None try: - from hermes_cli.config import load_config + from hermes_cli.config import get_compatible_custom_providers, load_config cfg = load_config() user_provs = cfg.get("providers") - custom_provs = cfg.get("custom_providers") + custom_provs = get_compatible_custom_providers(cfg) except Exception: pass diff --git a/gateway/run.py b/gateway/run.py index 4c30db7db8..afc5aa035e 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3330,21 +3330,26 @@ class GatewayRunner: # Must run after runtime resolution so _hyg_base_url is set. if _hyg_config_context_length is None and _hyg_base_url: try: - _hyg_custom_providers = _hyg_data.get("custom_providers") - if isinstance(_hyg_custom_providers, list): - for _cp in _hyg_custom_providers: - if not isinstance(_cp, dict): - continue - _cp_url = (_cp.get("base_url") or "").rstrip("/") - if _cp_url and _cp_url == _hyg_base_url.rstrip("/"): - _cp_models = _cp.get("models", {}) - if isinstance(_cp_models, dict): - _cp_model_cfg = _cp_models.get(_hyg_model, {}) - if isinstance(_cp_model_cfg, dict): - _cp_ctx = _cp_model_cfg.get("context_length") - if _cp_ctx is not None: - _hyg_config_context_length = int(_cp_ctx) - break + try: + from hermes_cli.config import get_compatible_custom_providers as _gw_gcp + _hyg_custom_providers = _gw_gcp(_hyg_data) + except Exception: + _hyg_custom_providers = _hyg_data.get("custom_providers") + if not isinstance(_hyg_custom_providers, list): + _hyg_custom_providers = [] + for _cp in _hyg_custom_providers: + if not isinstance(_cp, dict): + continue + _cp_url = (_cp.get("base_url") or "").rstrip("/") + if _cp_url and _cp_url == _hyg_base_url.rstrip("/"): + _cp_models = _cp.get("models", {}) + if isinstance(_cp_models, dict): + _cp_model_cfg = _cp_models.get(_hyg_model, {}) + if isinstance(_cp_model_cfg, dict): + _cp_ctx = _cp_model_cfg.get("context_length") + if _cp_ctx is not None: + _hyg_config_context_length = int(_cp_ctx) + break except (TypeError, ValueError): pass except Exception: @@ -4296,7 +4301,11 @@ class GatewayRunner: current_provider = model_cfg.get("provider", current_provider) current_base_url = model_cfg.get("base_url", "") user_provs = cfg.get("providers") - custom_provs = cfg.get("custom_providers") + try: + from hermes_cli.config import get_compatible_custom_providers + custom_provs = get_compatible_custom_providers(cfg) + except Exception: + custom_provs = cfg.get("custom_providers") except Exception: pass diff --git a/hermes_cli/auth_commands.py b/hermes_cli/auth_commands.py index 0532faa770..c1cf0ff618 100644 --- a/hermes_cli/auth_commands.py +++ b/hermes_cli/auth_commands.py @@ -36,25 +36,23 @@ _OAUTH_CAPABLE_PROVIDERS = {"anthropic", "nous", "openai-codex", "qwen-oauth"} def _get_custom_provider_names() -> list: - """Return list of (display_name, pool_key) tuples for custom_providers in config.""" + """Return list of (display_name, pool_key, provider_key) tuples.""" try: - from hermes_cli.config import load_config + from hermes_cli.config import get_compatible_custom_providers, load_config config = load_config() except Exception: return [] - custom_providers = config.get("custom_providers") - if not isinstance(custom_providers, list): - return [] result = [] - for entry in custom_providers: + for entry in get_compatible_custom_providers(config): if not isinstance(entry, dict): continue name = entry.get("name") if not isinstance(name, str) or not name.strip(): continue pool_key = f"{CUSTOM_POOL_PREFIX}{_normalize_custom_pool_name(name)}" - result.append((name.strip(), pool_key)) + provider_key = str(entry.get("provider_key", "") or "").strip() + result.append((name.strip(), pool_key, provider_key)) return result @@ -66,9 +64,11 @@ def _resolve_custom_provider_input(raw: str) -> str | None: # Direct match on 'custom:name' format if normalized.startswith(CUSTOM_POOL_PREFIX): return normalized - for display_name, pool_key in _get_custom_provider_names(): + for display_name, pool_key, provider_key in _get_custom_provider_names(): if _normalize_custom_pool_name(display_name) == normalized: return pool_key + if provider_key and provider_key.strip().lower() == normalized: + return pool_key return None @@ -405,7 +405,7 @@ def _pick_provider(prompt: str = "Provider") -> str: known = sorted(set(list(PROVIDER_REGISTRY.keys()) + ["openrouter"])) custom_names = _get_custom_provider_names() if custom_names: - custom_display = [name for name, _key in custom_names] + custom_display = [name for name, _key, _provider_key in custom_names] print(f"\nKnown providers: {', '.join(known)}") print(f"Custom endpoints: {', '.join(custom_display)}") else: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index ef4e04b716..f524e792a5 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1544,6 +1544,136 @@ def get_missing_skill_config_vars() -> List[Dict[str, Any]]: return missing +def _normalize_custom_provider_entry( + entry: Any, + *, + provider_key: str = "", +) -> Optional[Dict[str, Any]]: + """Return a runtime-compatible custom provider entry or ``None``.""" + if not isinstance(entry, dict): + return None + + base_url = "" + for url_key in ("api", "url", "base_url"): + raw_url = entry.get(url_key) + if isinstance(raw_url, str) and raw_url.strip(): + base_url = raw_url.strip() + break + if not base_url: + return None + + name = "" + raw_name = entry.get("name") + if isinstance(raw_name, str) and raw_name.strip(): + name = raw_name.strip() + elif provider_key.strip(): + name = provider_key.strip() + if not name: + return None + + normalized: Dict[str, Any] = { + "name": name, + "base_url": base_url, + } + + provider_key = provider_key.strip() + if provider_key: + normalized["provider_key"] = provider_key + + api_key = entry.get("api_key") + if isinstance(api_key, str) and api_key.strip(): + normalized["api_key"] = api_key.strip() + + key_env = entry.get("key_env") + if isinstance(key_env, str) and key_env.strip(): + normalized["key_env"] = key_env.strip() + + api_mode = entry.get("api_mode") or entry.get("transport") + if isinstance(api_mode, str) and api_mode.strip(): + normalized["api_mode"] = api_mode.strip() + + model_name = entry.get("model") or entry.get("default_model") + if isinstance(model_name, str) and model_name.strip(): + normalized["model"] = model_name.strip() + + models = entry.get("models") + if isinstance(models, dict) and models: + normalized["models"] = models + + context_length = entry.get("context_length") + if isinstance(context_length, int) and context_length > 0: + normalized["context_length"] = context_length + + rate_limit_delay = entry.get("rate_limit_delay") + if isinstance(rate_limit_delay, (int, float)) and rate_limit_delay >= 0: + normalized["rate_limit_delay"] = rate_limit_delay + + return normalized + + +def providers_dict_to_custom_providers(providers_dict: Any) -> List[Dict[str, Any]]: + """Normalize ``providers`` config entries into the legacy custom-provider shape.""" + if not isinstance(providers_dict, dict): + return [] + + custom_providers: List[Dict[str, Any]] = [] + for key, entry in providers_dict.items(): + normalized = _normalize_custom_provider_entry(entry, provider_key=str(key)) + if normalized is not None: + custom_providers.append(normalized) + + return custom_providers + + +def get_compatible_custom_providers( + config: Optional[Dict[str, Any]] = None, +) -> List[Dict[str, Any]]: + """Return a deduplicated custom-provider view across legacy and v12+ config. + + ``custom_providers`` remains the on-disk legacy format, while ``providers`` + is the newer keyed schema. Runtime and picker flows still need a single + list-shaped view, but we should not materialise that compatibility layer + back into config.yaml because it duplicates entries in UIs. + """ + if config is None: + config = load_config() + + compatible: List[Dict[str, Any]] = [] + seen_provider_keys: set = set() + seen_name_url_pairs: set = set() + + def _append_if_new(entry: Optional[Dict[str, Any]]) -> None: + if entry is None: + return + provider_key = str(entry.get("provider_key", "") or "").strip().lower() + name = str(entry.get("name", "") or "").strip().lower() + base_url = str(entry.get("base_url", "") or "").strip().rstrip("/").lower() + pair = (name, base_url) + + if provider_key and provider_key in seen_provider_keys: + return + if name and base_url and pair in seen_name_url_pairs: + return + + compatible.append(entry) + if provider_key: + seen_provider_keys.add(provider_key) + if name and base_url: + seen_name_url_pairs.add(pair) + + custom_providers = config.get("custom_providers") + if custom_providers is not None: + if not isinstance(custom_providers, list): + return [] + for entry in custom_providers: + _append_if_new(_normalize_custom_provider_entry(entry)) + + for entry in providers_dict_to_custom_providers(config.get("providers")): + _append_if_new(entry) + + return compatible + + def check_config_version() -> Tuple[int, int]: """ Check config version. @@ -1861,8 +1991,8 @@ def migrate_config(interactive: bool = True, quiet: bool = False) -> Dict[str, A if migrated_count > 0: config["providers"] = providers_dict - # Remove the old list - del config["custom_providers"] + # Remove the old list — runtime reads via get_compatible_custom_providers() + config.pop("custom_providers", None) save_config(config) if not quiet: print(f" ✓ Migrated {migrated_count} custom provider(s) to providers: section") diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 24ba11f20f..f653b4cd07 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -999,7 +999,7 @@ def select_provider_and_model(args=None): from hermes_cli.auth import ( resolve_provider, AuthError, format_auth_error, ) - from hermes_cli.config import load_config, get_env_value + from hermes_cli.config import get_compatible_custom_providers, load_config, get_env_value config = load_config() current_model = config.get("model") @@ -1090,11 +1090,8 @@ def select_provider_and_model(args=None): ] def _named_custom_provider_map(cfg) -> dict[str, dict[str, str]]: - custom_providers_cfg = cfg.get("custom_providers") or [] custom_provider_map = {} - if not isinstance(custom_providers_cfg, list): - return custom_provider_map - for entry in custom_providers_cfg: + for entry in get_compatible_custom_providers(cfg): if not isinstance(entry, dict): continue name = (entry.get("name") or "").strip() @@ -1102,12 +1099,20 @@ def select_provider_and_model(args=None): if not name or not base_url: continue key = "custom:" + name.lower().replace(" ", "-") + provider_key = (entry.get("provider_key") or "").strip() + if provider_key: + try: + resolve_provider(provider_key) + except AuthError: + key = provider_key custom_provider_map[key] = { "name": name, "base_url": base_url, "api_key": entry.get("api_key", ""), + "key_env": entry.get("key_env", ""), "model": entry.get("model", ""), "api_mode": entry.get("api_mode", ""), + "provider_key": provider_key, } return custom_provider_map @@ -1157,7 +1162,8 @@ def select_provider_and_model(args=None): if selected_provider == "more": ext_ordered = list(extended_providers) ext_ordered.append(("custom", "Custom endpoint (enter URL manually)")) - if _custom_provider_map: + _has_saved_custom_list = isinstance(config.get("custom_providers"), list) and bool(config.get("custom_providers")) + if _has_saved_custom_list: ext_ordered.append(("remove-custom", "Remove a saved custom provider")) ext_ordered.append(("cancel", "Cancel")) @@ -1184,7 +1190,7 @@ def select_provider_and_model(args=None): _model_flow_copilot(config, current_model) elif selected_provider == "custom": _model_flow_custom(config) - elif selected_provider.startswith("custom:"): + elif selected_provider.startswith("custom:") or selected_provider in _custom_provider_map: provider_info = _named_custom_provider_map(load_config()).get(selected_provider) if provider_info is None: print( @@ -1869,7 +1875,9 @@ def _model_flow_named_custom(config, provider_info): name = provider_info["name"] base_url = provider_info["base_url"] api_key = provider_info.get("api_key", "") + key_env = provider_info.get("key_env", "") saved_model = provider_info.get("model", "") + provider_key = (provider_info.get("provider_key") or "").strip() print(f" Provider: {name}") print(f" URL: {base_url}") @@ -1952,10 +1960,15 @@ def _model_flow_named_custom(config, provider_info): if not isinstance(model, dict): model = {"default": model} if model else {} cfg["model"] = model - model["provider"] = "custom" - model["base_url"] = base_url - if api_key: - model["api_key"] = api_key + if provider_key: + model["provider"] = provider_key + model.pop("base_url", None) + model.pop("api_key", None) + else: + model["provider"] = "custom" + model["base_url"] = base_url + if api_key: + model["api_key"] = api_key # Apply api_mode from custom_providers entry, or clear stale value custom_api_mode = provider_info.get("api_mode", "") if custom_api_mode: @@ -1965,8 +1978,23 @@ def _model_flow_named_custom(config, provider_info): save_config(cfg) deactivate_provider() - # Save model name to the custom_providers entry for next time - _save_custom_provider(base_url, api_key, model_name) + # Persist the selected model back to whichever schema owns this endpoint. + if provider_key: + cfg = load_config() + providers_cfg = cfg.get("providers") + if isinstance(providers_cfg, dict): + provider_entry = providers_cfg.get(provider_key) + if isinstance(provider_entry, dict): + provider_entry["default_model"] = model_name + if api_key and not str(provider_entry.get("api_key", "") or "").strip(): + provider_entry["api_key"] = api_key + if key_env and not str(provider_entry.get("key_env", "") or "").strip(): + provider_entry["key_env"] = key_env + cfg["providers"] = providers_cfg + save_config(cfg) + else: + # Save model name to the custom_providers entry for next time + _save_custom_provider(base_url, api_key, model_name) print(f"\n✅ Model set to: {model_name}") print(f" Provider: {name} ({base_url})") diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index d8854b893d..6957c80b6e 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -26,7 +26,7 @@ from hermes_cli.auth import ( resolve_external_process_provider_credentials, has_usable_secret, ) -from hermes_cli.config import load_config +from hermes_cli.config import get_compatible_custom_providers, load_config from hermes_constants import OPENROUTER_BASE_URL @@ -315,13 +315,16 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An # Fall back to custom_providers: list (legacy format) custom_providers = config.get("custom_providers") - if not isinstance(custom_providers, list): - if isinstance(custom_providers, dict): - logger.warning( - "custom_providers in config.yaml is a dict, not a list. " - "Each entry must be prefixed with '-' in YAML. " - "Run 'hermes doctor' for details." - ) + if isinstance(custom_providers, dict): + logger.warning( + "custom_providers in config.yaml is a dict, not a list. " + "Each entry must be prefixed with '-' in YAML. " + "Run 'hermes doctor' for details." + ) + return None + + custom_providers = get_compatible_custom_providers(config) + if not custom_providers: return None for entry in custom_providers: @@ -333,13 +336,21 @@ def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, An continue name_norm = _normalize_custom_provider_name(name) menu_key = f"custom:{name_norm}" - if requested_norm not in {name_norm, menu_key}: + provider_key = str(entry.get("provider_key", "") or "").strip() + provider_key_norm = _normalize_custom_provider_name(provider_key) if provider_key else "" + provider_menu_key = f"custom:{provider_key_norm}" if provider_key_norm else "" + if requested_norm not in {name_norm, menu_key, provider_key_norm, provider_menu_key}: continue result = { "name": name.strip(), "base_url": base_url.strip(), "api_key": str(entry.get("api_key", "") or "").strip(), } + key_env = str(entry.get("key_env", "") or "").strip() + if key_env: + result["key_env"] = key_env + if provider_key: + result["provider_key"] = provider_key api_mode = _parse_api_mode(entry.get("api_mode")) if api_mode: result["api_mode"] = api_mode @@ -381,6 +392,7 @@ def _resolve_named_custom_runtime( api_key_candidates = [ (explicit_api_key or "").strip(), str(custom_provider.get("api_key", "") or "").strip(), + os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(), os.getenv("OPENAI_API_KEY", "").strip(), os.getenv("OPENROUTER_API_KEY", "").strip(), ] diff --git a/run_agent.py b/run_agent.py index 89526320ec..64daad4c8b 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1267,24 +1267,29 @@ class AIAgent: # Check custom_providers per-model context_length if _config_context_length is None: - _custom_providers = _agent_cfg.get("custom_providers") - if isinstance(_custom_providers, list): - for _cp_entry in _custom_providers: - if not isinstance(_cp_entry, dict): - continue - _cp_url = (_cp_entry.get("base_url") or "").rstrip("/") - if _cp_url and _cp_url == self.base_url.rstrip("/"): - _cp_models = _cp_entry.get("models", {}) - if isinstance(_cp_models, dict): - _cp_model_cfg = _cp_models.get(self.model, {}) - if isinstance(_cp_model_cfg, dict): - _cp_ctx = _cp_model_cfg.get("context_length") - if _cp_ctx is not None: - try: - _config_context_length = int(_cp_ctx) - except (TypeError, ValueError): - pass - break + try: + from hermes_cli.config import get_compatible_custom_providers + _custom_providers = get_compatible_custom_providers(_agent_cfg) + except Exception: + _custom_providers = _agent_cfg.get("custom_providers") + if not isinstance(_custom_providers, list): + _custom_providers = [] + for _cp_entry in _custom_providers: + if not isinstance(_cp_entry, dict): + continue + _cp_url = (_cp_entry.get("base_url") or "").rstrip("/") + if _cp_url and _cp_url == self.base_url.rstrip("/"): + _cp_models = _cp_entry.get("models", {}) + if isinstance(_cp_models, dict): + _cp_model_cfg = _cp_models.get(self.model, {}) + if isinstance(_cp_model_cfg, dict): + _cp_ctx = _cp_model_cfg.get("context_length") + if _cp_ctx is not None: + try: + _config_context_length = int(_cp_ctx) + except (TypeError, ValueError): + pass + break # Select context engine: config-driven (like memory providers). # 1. Check config.yaml context.engine setting diff --git a/tests/hermes_cli/test_config.py b/tests/hermes_cli/test_config.py index d934a80125..397027d3a9 100644 --- a/tests/hermes_cli/test_config.py +++ b/tests/hermes_cli/test_config.py @@ -10,6 +10,7 @@ from hermes_cli.config import ( DEFAULT_CONFIG, get_hermes_home, ensure_hermes_home, + get_compatible_custom_providers, load_config, load_env, migrate_config, @@ -424,6 +425,146 @@ class TestAnthropicTokenMigration: assert load_env().get("ANTHROPIC_TOKEN") == "current-token" +class TestCustomProviderCompatibility: + """Custom provider compatibility across legacy and v12+ config schemas.""" + + def test_v11_upgrade_moves_custom_providers_into_providers(self, tmp_path): + config_path = tmp_path / "config.yaml" + config_path.write_text( + yaml.safe_dump( + { + "_config_version": 11, + "model": { + "default": "openai/gpt-5.4", + "provider": "openrouter", + }, + "custom_providers": [ + { + "name": "OpenAI Direct", + "base_url": "https://api.openai.com/v1", + "api_key": "test-key", + "api_mode": "codex_responses", + "model": "gpt-5-mini", + } + ], + "fallback_providers": [ + {"provider": "openai-direct", "model": "gpt-5-mini"} + ], + } + ), + encoding="utf-8", + ) + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + migrate_config(interactive=False, quiet=True) + raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) + + assert raw["_config_version"] == 17 + assert raw["providers"]["openai-direct"] == { + "api": "https://api.openai.com/v1", + "api_key": "test-key", + "default_model": "gpt-5-mini", + "name": "OpenAI Direct", + "transport": "codex_responses", + } + # custom_providers removed by migration — runtime reads via compat layer + assert "custom_providers" not in raw + + def test_providers_dict_resolves_at_runtime(self, tmp_path): + """After migration deleted custom_providers, get_compatible_custom_providers + still finds entries from the providers dict.""" + config_path = tmp_path / "config.yaml" + config_path.write_text( + yaml.safe_dump( + { + "_config_version": 17, + "providers": { + "openai-direct": { + "api": "https://api.openai.com/v1", + "api_key": "test-key", + "default_model": "gpt-5-mini", + "name": "OpenAI Direct", + "transport": "codex_responses", + } + }, + } + ), + encoding="utf-8", + ) + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + compatible = get_compatible_custom_providers() + + assert len(compatible) == 1 + assert compatible[0]["name"] == "OpenAI Direct" + assert compatible[0]["base_url"] == "https://api.openai.com/v1" + assert compatible[0]["provider_key"] == "openai-direct" + assert compatible[0]["api_mode"] == "codex_responses" + + def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path): + config_path = tmp_path / "config.yaml" + config_path.write_text( + yaml.safe_dump( + { + "_config_version": 17, + "providers": { + "my-provider": { + "name": "My Provider", + "api": "https://api.example.com/v1", + "url": "https://url.example.com/v1", + "base_url": "https://base.example.com/v1", + } + }, + } + ), + encoding="utf-8", + ) + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + compatible = get_compatible_custom_providers() + + assert compatible == [ + { + "name": "My Provider", + "base_url": "https://api.example.com/v1", + "provider_key": "my-provider", + } + ] + + def test_dedup_across_legacy_and_providers(self, tmp_path): + """Same name+url in both schemas should not produce duplicates.""" + config_path = tmp_path / "config.yaml" + config_path.write_text( + yaml.safe_dump( + { + "_config_version": 17, + "custom_providers": [ + { + "name": "OpenAI Direct", + "base_url": "https://api.openai.com/v1", + "api_key": "legacy-key", + } + ], + "providers": { + "openai-direct": { + "api": "https://api.openai.com/v1", + "api_key": "new-key", + "name": "OpenAI Direct", + } + }, + } + ), + encoding="utf-8", + ) + + with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}): + compatible = get_compatible_custom_providers() + + assert len(compatible) == 1 + # Legacy entry wins (read first) + assert compatible[0]["api_key"] == "legacy-key" + + class TestInterimAssistantMessageConfig: """Test the explicit gateway interim-message config gate.""" @@ -441,6 +582,6 @@ class TestInterimAssistantMessageConfig: migrate_config(interactive=False, quiet=True) raw = yaml.safe_load(config_path.read_text(encoding="utf-8")) - assert raw["_config_version"] == 16 + assert raw["_config_version"] == 17 assert raw["display"]["tool_progress"] == "off" assert raw["display"]["interim_assistant_messages"] is True diff --git a/tests/hermes_cli/test_runtime_provider_resolution.py b/tests/hermes_cli/test_runtime_provider_resolution.py index 20486a805b..c7510a55b8 100644 --- a/tests/hermes_cli/test_runtime_provider_resolution.py +++ b/tests/hermes_cli/test_runtime_provider_resolution.py @@ -119,6 +119,11 @@ def test_resolve_runtime_provider_falls_back_when_pool_empty(monkeypatch): def test_resolve_runtime_provider_codex(monkeypatch): + monkeypatch.setattr( + rp, + "load_pool", + lambda provider: type("P", (), {"has_credentials": lambda self: False})(), + ) monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openai-codex") monkeypatch.setattr( rp, @@ -567,6 +572,87 @@ def test_named_custom_provider_uses_saved_credentials(monkeypatch): assert resolved["source"] == "custom_provider:Local" +def test_named_custom_provider_uses_providers_dict_when_list_missing(monkeypatch): + """After v11→v12 migration deletes custom_providers, resolution should + still find entries in the providers dict via get_compatible_custom_providers.""" + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.setattr( + rp, + "load_config", + lambda: { + "providers": { + "openai-direct-primary": { + "api": "https://api.openai.com/v1", + "api_key": "dir-key", + "default_model": "gpt-5-mini", + "name": "OpenAI Direct (Primary)", + "transport": "codex_responses", + } + } + }, + ) + monkeypatch.setattr( + rp, + "resolve_provider", + lambda *a, **k: (_ for _ in ()).throw( + AssertionError( + "resolve_provider should not be called for named custom providers" + ) + ), + ) + + resolved = rp.resolve_runtime_provider(requested="openai-direct-primary") + + assert resolved["provider"] == "custom" + assert resolved["api_mode"] == "codex_responses" + assert resolved["base_url"] == "https://api.openai.com/v1" + assert resolved["api_key"] == "dir-key" + assert resolved["requested_provider"] == "openai-direct-primary" + assert resolved["source"] == "custom_provider:OpenAI Direct (Primary)" + assert resolved["model"] == "gpt-5-mini" + + +def test_named_custom_provider_uses_key_env_from_providers_dict(monkeypatch): + """providers dict entries with key_env should resolve API key from env var.""" + monkeypatch.delenv("OPENAI_API_KEY", raising=False) + monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) + monkeypatch.setenv("MYCORP_API_KEY", "env-secret") + monkeypatch.setattr( + rp, + "load_config", + lambda: { + "providers": { + "mycorp-proxy": { + "base_url": "https://proxy.example.com/v1", + "default_model": "acme-large", + "key_env": "MYCORP_API_KEY", + "name": "MyCorp Proxy", + } + } + }, + ) + monkeypatch.setattr( + rp, + "resolve_provider", + lambda *a, **k: (_ for _ in ()).throw( + AssertionError( + "resolve_provider should not be called for named custom providers" + ) + ), + ) + + resolved = rp.resolve_runtime_provider(requested="mycorp-proxy") + + assert resolved["provider"] == "custom" + assert resolved["api_mode"] == "chat_completions" + assert resolved["base_url"] == "https://proxy.example.com/v1" + assert resolved["api_key"] == "env-secret" + assert resolved["requested_provider"] == "mycorp-proxy" + assert resolved["source"] == "custom_provider:MyCorp Proxy" + assert resolved["model"] == "acme-large" + + def test_named_custom_provider_falls_back_to_openai_api_key(monkeypatch): monkeypatch.setenv("OPENAI_API_KEY", "env-openai-key") monkeypatch.delenv("OPENROUTER_API_KEY", raising=False) From 4ca6668daf2c4083cb1ecee0725543922cddd880 Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:50:59 -0700 Subject: [PATCH 03/69] docs: comprehensive update for recent merged PRs (#9019) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Audit and update documentation across 12 files to match changes from ~50 recently merged PRs. Key updates: Slash commands (slash-commands.md): - Add 5 missing commands: /snapshot, /fast, /image, /debug, /restart - Fix /status incorrectly labeled as messaging-only (available in both) - Add --global flag to /model docs - Add [focus topic] arg to /compress docs CLI commands (cli-commands.md): - Add hermes debug share section with options and examples - Add hermes backup section with --quick and --label flags - Add hermes import section Feature docs: - TTS: document global tts.speed and per-provider speed for Edge/OpenAI - Web dashboard: add docs for 5 missing pages (Sessions, Logs, Analytics, Cron, Skills) and 15+ API endpoints - WhatsApp: add streaming, 4K chunking, and markdown formatting docs - Skills: add GitHub rate-limit/GITHUB_TOKEN troubleshooting tip - Budget: document CLI notification on iteration budget exhaustion Config migration (compression.summary_* → auxiliary.compression.*): - Update configuration.md, environment-variables.md, fallback-providers.md, cli.md, and context-compression-and-caching.md - Replace legacy compression.summary_model/provider/base_url references with auxiliary.compression.model/provider/base_url - Add legacy migration info boxes explaining auto-migration Minor fixes: - wecom-callback.md: clarify 'text only' limitation (input only) - Escape {session_id}/{job_id} in web-dashboard.md headings for MDX --- .../context-compression-and-caching.md | 8 +- website/docs/reference/cli-commands.md | 67 +++++++++++ .../docs/reference/environment-variables.md | 15 ++- website/docs/reference/slash-commands.md | 22 ++-- website/docs/user-guide/cli.md | 6 +- website/docs/user-guide/configuration.md | 45 ++++--- .../user-guide/features/fallback-providers.md | 28 +++-- website/docs/user-guide/features/skills.md | 4 + website/docs/user-guide/features/tts.md | 5 + .../docs/user-guide/features/web-dashboard.md | 110 +++++++++++++++++- .../user-guide/messaging/wecom-callback.md | 2 +- website/docs/user-guide/messaging/whatsapp.md | 27 +++++ 12 files changed, 299 insertions(+), 40 deletions(-) diff --git a/website/docs/developer-guide/context-compression-and-caching.md b/website/docs/developer-guide/context-compression-and-caching.md index d17f45b95b..c500612f65 100644 --- a/website/docs/developer-guide/context-compression-and-caching.md +++ b/website/docs/developer-guide/context-compression-and-caching.md @@ -84,7 +84,13 @@ compression: threshold: 0.50 # Fraction of context window (default: 0.50 = 50%) target_ratio: 0.20 # How much of threshold to keep as tail (default: 0.20) protect_last_n: 20 # Minimum protected tail messages (default: 20) - summary_model: null # Override model for summaries (default: uses auxiliary) + +# Summarization model/provider configured under auxiliary: +auxiliary: + compression: + model: null # Override model for summaries (default: auto-detect) + provider: auto # Provider: "auto", "openrouter", "nous", "main", etc. + base_url: null # Custom OpenAI-compatible endpoint ``` ### Parameter Details diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 7086f07030..95416d2127 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -44,6 +44,9 @@ hermes [global-options] [subcommand/options] | `hermes webhook` | Manage dynamic webhook subscriptions for event-driven activation. | | `hermes doctor` | Diagnose config and dependency issues. | | `hermes dump` | Copy-pasteable setup summary for support/debugging. | +| `hermes debug` | Debug tools — upload logs and system info for support. | +| `hermes backup` | Back up Hermes home directory to a zip file. | +| `hermes import` | Restore a Hermes backup from a zip file. | | `hermes logs` | View, tail, and filter agent/gateway/error log files. | | `hermes config` | Show, edit, migrate, and query configuration files. | | `hermes pairing` | Approve or revoke messaging pairing codes. | @@ -355,6 +358,70 @@ config_overrides: `hermes dump` is specifically designed for sharing. For interactive diagnostics, use `hermes doctor`. For a visual overview, use `hermes status`. ::: +## `hermes debug` + +```bash +hermes debug share [options] +``` + +Upload a debug report (system info + recent logs) to a paste service and get a shareable URL. Useful for quick support requests — includes everything a helper needs to diagnose your issue. + +| Option | Description | +|--------|-------------| +| `--lines ` | Number of log lines to include per log file (default: 200). | +| `--expire ` | Paste expiry in days (default: 7). | +| `--local` | Print the report locally instead of uploading. | + +The report includes system info (OS, Python version, Hermes version), recent agent and gateway logs (512 KB limit per file), and redacted API key status. Keys are always redacted — no secrets are uploaded. + +Paste services tried in order: paste.rs, dpaste.com. + +### Examples + +```bash +hermes debug share # Upload debug report, print URL +hermes debug share --lines 500 # Include more log lines +hermes debug share --expire 30 # Keep paste for 30 days +hermes debug share --local # Print report to terminal (no upload) +``` + +## `hermes backup` + +```bash +hermes backup [options] +``` + +Create a zip archive of your Hermes configuration, skills, sessions, and data. The backup excludes the hermes-agent codebase itself. + +| Option | Description | +|--------|-------------| +| `-o`, `--output ` | Output path for the zip file (default: `~/hermes-backup-.zip`). | +| `-q`, `--quick` | Quick snapshot: only critical state files (config.yaml, state.db, .env, auth, cron jobs). Much faster than a full backup. | +| `-l`, `--label ` | Label for the snapshot (only used with `--quick`). | + +The backup uses SQLite's `backup()` API for safe copying, so it works correctly even when Hermes is running (WAL-mode safe). + +### Examples + +```bash +hermes backup # Full backup to ~/hermes-backup-*.zip +hermes backup -o /tmp/hermes.zip # Full backup to specific path +hermes backup --quick # Quick state-only snapshot +hermes backup --quick --label "pre-upgrade" # Quick snapshot with label +``` + +## `hermes import` + +```bash +hermes import [options] +``` + +Restore a previously created Hermes backup into your Hermes home directory. + +| Option | Description | +|--------|-------------| +| `-f`, `--force` | Overwrite existing files without confirmation. | + ## `hermes logs` ```bash diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index ff832a0361..59092b3343 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -328,17 +328,24 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI ## Context Compression (config.yaml only) -Context compression is configured exclusively through the `compression` section in `config.yaml` — there are no environment variables for it. +Context compression is configured exclusively through `config.yaml` — there are no environment variables for it. Threshold settings live in the `compression:` block, while the summarization model/provider lives under `auxiliary.compression:`. ```yaml compression: enabled: true threshold: 0.50 - summary_model: "" # empty = use main configured model - summary_provider: auto - summary_base_url: null # Custom OpenAI-compatible endpoint for summaries + +auxiliary: + compression: + model: "" # empty = auto-detect + provider: auto + base_url: null # Custom OpenAI-compatible endpoint for summaries ``` +:::info Legacy migration +Older configs with `compression.summary_model`, `compression.summary_provider`, and `compression.summary_base_url` are automatically migrated to `auxiliary.compression.*` on first load. +::: + ## Auxiliary Task Overrides | Variable | Description | diff --git a/website/docs/reference/slash-commands.md b/website/docs/reference/slash-commands.md index 1d9aebd463..88563ee0f4 100644 --- a/website/docs/reference/slash-commands.md +++ b/website/docs/reference/slash-commands.md @@ -28,8 +28,9 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/retry` | Retry the last message (resend to agent) | | `/undo` | Remove the last user/assistant exchange | | `/title` | Set a title for the current session (usage: /title My Session Name) | -| `/compress` | Manually compress conversation context (flush memories + summarize) | +| `/compress [focus topic]` | Manually compress conversation context (flush memories + summarize). Optional focus topic narrows what the summary preserves. | | `/rollback` | List or restore filesystem checkpoints (usage: /rollback [number]) | +| `/snapshot [create\|restore \|prune]` (alias: `/snap`) | Create or restore state snapshots of Hermes config/state. `create [label]` saves a snapshot, `restore ` reverts to it, `prune [N]` removes old snapshots, or list all with no args. | | `/stop` | Kill all running background processes | | `/queue ` (alias: `/q`) | Queue a prompt for the next turn (doesn't interrupt the current agent response). **Note:** `/q` is claimed by both `/queue` and `/quit`; the last registration wins, so `/q` resolves to `/quit` in practice. Use `/queue` explicitly. | | `/resume [name]` | Resume a previously-named session | @@ -44,11 +45,12 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | Command | Description | |---------|-------------| | `/config` | Show current configuration | -| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint) | +| `/model [model-name]` | Show or change the current model. Supports: `/model claude-sonnet-4`, `/model provider:model` (switch providers), `/model custom:model` (custom endpoint), `/model custom:name:model` (named custom provider), `/model custom` (auto-detect from endpoint). Use `--global` to persist the change to config.yaml. | | `/provider` | Show available providers and current provider | | `/personality` | Set a predefined personality | | `/verbose` | Cycle tool progress display: off → new → all → verbose. Can be [enabled for messaging](#notes) via config. | | `/reasoning` | Manage reasoning effort and display (usage: /reasoning [level\|show\|hide]) | +| `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. Options: `normal`, `fast`, `status`, `on`, `off`. | | `/skin` | Show or change the display skin/theme | | `/voice [on\|off\|tts\|status]` | Toggle CLI voice mode and spoken playback. Recording uses `voice.record_key` (default: `Ctrl+B`). | | `/yolo` | Toggle YOLO mode — skip all dangerous command approval prompts. | @@ -75,6 +77,8 @@ Type `/` in the CLI to open the autocomplete menu. Built-in commands are case-in | `/insights` | Show usage insights and analytics (last 30 days) | | `/platforms` (alias: `/gateway`) | Show gateway/messaging platform status | | `/paste` | Check clipboard for an image and attach it | +| `/image ` | Attach a local image file for your next prompt. | +| `/debug` | Upload debug report (system info + logs) and get shareable links. Also available in messaging. | | `/profile` | Show active profile name and home directory | ### Exit @@ -117,13 +121,14 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/reset` | Reset conversation history. | | `/status` | Show session info. | | `/stop` | Kill all running background processes and interrupt the running agent. | -| `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), and auto-detect (`/model custom`). | +| `/model [provider:model]` | Show or change the model. Supports provider switches (`/model zai:glm-5`), custom endpoints (`/model custom:model`), named custom providers (`/model custom:local:qwen`), and auto-detect (`/model custom`). Use `--global` to persist the change to config.yaml. | | `/provider` | Show provider availability and auth status. | | `/personality [name]` | Set a personality overlay for the session. | +| `/fast [normal\|fast\|status]` | Toggle fast mode — OpenAI Priority Processing / Anthropic Fast Mode. | | `/retry` | Retry the last message. | | `/undo` | Remove the last exchange. | | `/sethome` (alias: `/set-home`) | Mark the current chat as the platform home channel for deliveries. | -| `/compress` | Manually compress conversation context. | +| `/compress [focus topic]` | Manually compress conversation context. Optional focus topic narrows what the summary preserves. | | `/title [name]` | Set or show the session title. | | `/resume [name]` | Resume a previously named session. | | `/usage` | Show token usage, estimated cost breakdown (input/output), context window state, and session duration. | @@ -131,6 +136,7 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/reasoning [level\|show\|hide]` | Change reasoning effort or toggle reasoning display. | | `/voice [on\|off\|tts\|join\|channel\|leave\|status]` | Control spoken replies in chat. `join`/`channel`/`leave` manage Discord voice-channel mode. | | `/rollback [number]` | List or restore filesystem checkpoints. | +| `/snapshot [create\|restore \|prune]` (alias: `/snap`) | Create or restore state snapshots of Hermes config/state. | | `/background ` | Run a prompt in a separate background session. Results are delivered back to the same chat when the task finishes. See [Messaging Background Sessions](/docs/user-guide/messaging/#background-sessions). | | `/plan [request]` | Load the bundled `plan` skill to write a markdown plan instead of executing the work. Plans are saved under `.hermes/plans/` relative to the active workspace/backend working directory. | | `/reload-mcp` (alias: `/reload_mcp`) | Reload MCP servers from config. | @@ -140,13 +146,15 @@ The messaging gateway supports the following built-in commands inside Telegram, | `/approve [session\|always]` | Approve and execute a pending dangerous command. `session` approves for this session only; `always` adds to permanent allowlist. | | `/deny` | Reject a pending dangerous command. | | `/update` | Update Hermes Agent to the latest version. | +| `/restart` | Gracefully restart the gateway after draining active runs. When the gateway comes back online, it sends a confirmation to the requester's chat/thread. | +| `/debug` | Upload debug report (system info + logs) and get shareable links. | | `/help` | Show messaging help. | | `/` | Invoke any installed skill by name. | ## Notes -- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/statusbar`, and `/plugins` are **CLI-only** commands. +- `/skin`, `/tools`, `/toolsets`, `/browser`, `/config`, `/cron`, `/skills`, `/platforms`, `/paste`, `/image`, `/statusbar`, and `/plugins` are **CLI-only** commands. - `/verbose` is **CLI-only by default**, but can be enabled for messaging platforms by setting `display.tool_progress_command: true` in `config.yaml`. When enabled, it cycles the `display.tool_progress` mode and saves to config. -- `/status`, `/sethome`, `/update`, `/approve`, `/deny`, and `/commands` are **messaging-only** commands. -- `/background`, `/voice`, `/reload-mcp`, `/rollback`, and `/yolo` work in **both** the CLI and the messaging gateway. +- `/sethome`, `/update`, `/restart`, `/approve`, `/deny`, and `/commands` are **messaging-only** commands. +- `/status`, `/background`, `/voice`, `/reload-mcp`, `/rollback`, `/snapshot`, `/debug`, `/fast`, and `/yolo` work in **both** the CLI and the messaging gateway. - `/voice join`, `/voice channel`, and `/voice leave` are only meaningful on Discord. diff --git a/website/docs/user-guide/cli.md b/website/docs/user-guide/cli.md index e37b1ddba7..43d12611f9 100644 --- a/website/docs/user-guide/cli.md +++ b/website/docs/user-guide/cli.md @@ -322,7 +322,11 @@ Long conversations are automatically summarized when approaching context limits: compression: enabled: true threshold: 0.50 # Compress at 50% of context limit by default - summary_model: "google/gemini-3-flash-preview" # Model used for summarization + +# Summarization model configured under auxiliary: +auxiliary: + compression: + model: "google/gemini-3-flash-preview" # Model used for summarization ``` When compression triggers, middle turns are summarized while the first 3 and last 4 turns are always preserved. diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index 9f7c9e2dd4..b62d313e1f 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -441,11 +441,19 @@ compression: threshold: 0.50 # Compress at this % of context limit target_ratio: 0.20 # Fraction of threshold to preserve as recent tail protect_last_n: 20 # Min recent messages to keep uncompressed - summary_model: "google/gemini-3-flash-preview" # Model for summarization - summary_provider: "auto" # Provider: "auto", "openrouter", "nous", "codex", "main", etc. - summary_base_url: null # Custom OpenAI-compatible endpoint (overrides provider) + +# The summarization model/provider is configured under auxiliary: +auxiliary: + compression: + model: "google/gemini-3-flash-preview" # Model for summarization + provider: "auto" # Provider: "auto", "openrouter", "nous", "codex", "main", etc. + base_url: null # Custom OpenAI-compatible endpoint (overrides provider) ``` +:::info Legacy config migration +Older configs with `compression.summary_model`, `compression.summary_provider`, and `compression.summary_base_url` are automatically migrated to `auxiliary.compression.*` on first load (config version 17). No manual action needed. +::: + ### Common setups **Default (auto-detect) — no configuration needed:** @@ -458,30 +466,32 @@ Uses the first available provider (OpenRouter → Nous → Codex) with Gemini Fl **Force a specific provider** (OAuth or API-key based): ```yaml -compression: - summary_provider: nous - summary_model: gemini-3-flash +auxiliary: + compression: + provider: nous + model: gemini-3-flash ``` Works with any provider: `nous`, `openrouter`, `codex`, `anthropic`, `main`, etc. **Custom endpoint** (self-hosted, Ollama, zai, DeepSeek, etc.): ```yaml -compression: - summary_model: glm-4.7 - summary_base_url: https://api.z.ai/api/coding/paas/v4 +auxiliary: + compression: + model: glm-4.7 + base_url: https://api.z.ai/api/coding/paas/v4 ``` Points at a custom OpenAI-compatible endpoint. Uses `OPENAI_API_KEY` for auth. ### How the three knobs interact -| `summary_provider` | `summary_base_url` | Result | +| `auxiliary.compression.provider` | `auxiliary.compression.base_url` | Result | |---------------------|---------------------|--------| | `auto` (default) | not set | Auto-detect best available provider | | `nous` / `openrouter` / etc. | not set | Force that provider, use its auth | | any | set | Use the custom endpoint directly (provider ignored) | :::warning Summary model context length requirement -The `summary_model` **must** have a context window at least as large as your main agent model's. The compressor sends the full middle section of the conversation to the summary model — if that model's context window is smaller than the main model's, the summarization call will fail with a context length error. When this happens, the middle turns are **dropped without a summary**, losing conversation context silently. If you override `summary_model`, verify its context length meets or exceeds your main model's. +The summary model **must** have a context window at least as large as your main agent model's. The compressor sends the full middle section of the conversation to the summary model — if that model's context window is smaller than the main model's, the summarization call will fail with a context length error. When this happens, the middle turns are **dropped without a summary**, losing conversation context silently. If you override the model, verify its context length meets or exceeds your main model's. ::: ## Context Engine @@ -522,6 +532,8 @@ agent: Budget pressure is enabled by default. The agent sees warnings naturally as part of tool results, encouraging it to consolidate its work and deliver a response before running out of iterations. +When the iteration budget is fully exhausted, the CLI shows a notification to the user: `⚠ Iteration budget reached (90/90) — response may be incomplete`. If the budget runs out during active work, the agent generates a summary of what was accomplished before stopping. + ### Streaming Timeouts The LLM streaming connection has two timeout layers. Both auto-adjust for local providers (localhost, LAN IPs) — no configuration needed for most setups. @@ -666,7 +678,7 @@ Each auxiliary task has a configurable `timeout` (in seconds). Defaults: vision ::: :::info -Context compression has its own top-level `compression:` block with `summary_provider`, `summary_model`, and `summary_base_url` — see [Context Compression](#context-compression) above. The fallback model uses a `fallback_model:` block — see [Fallback Model](/docs/integrations/providers#fallback-model). All three follow the same provider/model/base_url pattern. +Context compression has its own `compression:` block for thresholds and an `auxiliary.compression:` block for model/provider settings — see [Context Compression](#context-compression) above. The fallback model uses a `fallback_model:` block — see [Fallback Model](/docs/integrations/providers#fallback-model). All three follow the same provider/model/base_url pattern. ::: ### Changing the Vision Model @@ -839,16 +851,21 @@ agent: ```yaml tts: - provider: "edge" # "edge" | "elevenlabs" | "openai" | "neutts" + provider: "edge" # "edge" | "elevenlabs" | "openai" | "neutts" | "minimax" + speed: 1.0 # Global speed multiplier (fallback for all providers) edge: voice: "en-US-AriaNeural" # 322 voices, 74 languages + speed: 1.0 # Speed multiplier (converted to rate percentage, e.g. 1.5 → +50%) elevenlabs: voice_id: "pNInz6obpgDQGcFmaJgB" model_id: "eleven_multilingual_v2" openai: model: "gpt-4o-mini-tts" voice: "alloy" # alloy, echo, fable, onyx, nova, shimmer + speed: 1.0 # Speed multiplier (clamped to 0.25–4.0 by the API) base_url: "https://api.openai.com/v1" # Override for OpenAI-compatible TTS endpoints + minimax: + speed: 1.0 # Speech speed multiplier neutts: ref_audio: '' ref_text: '' @@ -858,6 +875,8 @@ tts: This controls both the `text_to_speech` tool and spoken replies in voice mode (`/voice tts` in the CLI or messaging gateway). +**Speed fallback hierarchy:** provider-specific speed (e.g. `tts.edge.speed`) → global `tts.speed` → `1.0` default. Set the global `tts.speed` to apply a uniform speed across all providers, or override per-provider for fine-grained control. + ## Display Settings ```yaml diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index b539cb1279..0caa2f0ecd 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -156,7 +156,7 @@ Hermes uses separate lightweight models for side tasks. Each task has its own pr |------|-------------|-----------| | Vision | Image analysis, browser screenshots | `auxiliary.vision` | | Web Extract | Web page summarization | `auxiliary.web_extract` | -| Compression | Context compression summaries | `auxiliary.compression` or `compression.summary_provider` | +| Compression | Context compression summaries | `auxiliary.compression` | | Session Search | Past session summarization | `auxiliary.session_search` | | Skills Hub | Skill search and discovery | `auxiliary.skills_hub` | | MCP | MCP helper operations | `auxiliary.mcp` | @@ -219,13 +219,14 @@ auxiliary: model: "" ``` -Every task above follows the same **provider / model / base_url** pattern. Context compression uses its own top-level block: +Every task above follows the same **provider / model / base_url** pattern. Context compression is configured under `auxiliary.compression`: ```yaml -compression: - summary_provider: main # Same provider options as auxiliary tasks - summary_model: google/gemini-3-flash-preview - summary_base_url: null # Custom OpenAI-compatible endpoint +auxiliary: + compression: + provider: main # Same provider options as other auxiliary tasks + model: google/gemini-3-flash-preview + base_url: null # Custom OpenAI-compatible endpoint ``` And the fallback model uses: @@ -270,15 +271,18 @@ auxiliary: ## Context Compression Fallback -Context compression has a legacy configuration path in addition to the auxiliary system: +Context compression uses the `auxiliary.compression` config block to control which model and provider handles summarization: ```yaml -compression: - summary_provider: "auto" # auto | openrouter | nous | main - summary_model: "google/gemini-3-flash-preview" +auxiliary: + compression: + provider: "auto" # auto | openrouter | nous | main + model: "google/gemini-3-flash-preview" ``` -This is equivalent to configuring `auxiliary.compression.provider` and `auxiliary.compression.model`. If both are set, the `auxiliary.compression` values take precedence. +:::info Legacy migration +Older configs with `compression.summary_model` / `compression.summary_provider` / `compression.summary_base_url` are automatically migrated to `auxiliary.compression.*` on first load (config version 17). +::: If no provider is available for compression, Hermes drops middle conversation turns without generating a summary rather than failing the session. @@ -325,7 +329,7 @@ See [Scheduled Tasks (Cron)](/docs/user-guide/features/cron) for full configurat | Main agent model | `fallback_model` in config.yaml — one-shot failover on errors | `fallback_model:` (top-level) | | Vision | Auto-detection chain + internal OpenRouter retry | `auxiliary.vision` | | Web extraction | Auto-detection chain + internal OpenRouter retry | `auxiliary.web_extract` | -| Context compression | Auto-detection chain, degrades to no-summary if unavailable | `auxiliary.compression` or `compression.summary_provider` | +| Context compression | Auto-detection chain, degrades to no-summary if unavailable | `auxiliary.compression` | | Session search | Auto-detection chain | `auxiliary.session_search` | | Skills hub | Auto-detection chain | `auxiliary.skills_hub` | | MCP helpers | Auto-detection chain | `auxiliary.mcp` | diff --git a/website/docs/user-guide/features/skills.md b/website/docs/user-guide/features/skills.md index 69663a26e1..aead5ac268 100644 --- a/website/docs/user-guide/features/skills.md +++ b/website/docs/user-guide/features/skills.md @@ -426,6 +426,10 @@ hermes skills update react # Update one specific installed hub skill This uses the stored source identifier plus the current upstream bundle content hash to detect drift. +:::tip GitHub rate limits +Skills hub operations use the GitHub API, which has a rate limit of 60 requests/hour for unauthenticated users. If you see rate-limit errors during install or search, set `GITHUB_TOKEN` in your `.env` file to increase the limit to 5,000 requests/hour. The error message includes an actionable hint when this happens. +::: + ### Slash commands (inside chat) All the same commands work with `/skills`: diff --git a/website/docs/user-guide/features/tts.md b/website/docs/user-guide/features/tts.md index 656a41fd83..625e25ad9e 100644 --- a/website/docs/user-guide/features/tts.md +++ b/website/docs/user-guide/features/tts.md @@ -36,8 +36,10 @@ Convert text to speech with six providers: # In ~/.hermes/config.yaml tts: provider: "edge" # "edge" | "elevenlabs" | "openai" | "minimax" | "mistral" | "neutts" + speed: 1.0 # Global speed multiplier (provider-specific settings override this) edge: voice: "en-US-AriaNeural" # 322 voices, 74 languages + speed: 1.0 # Converted to rate percentage (+/-%) elevenlabs: voice_id: "pNInz6obpgDQGcFmaJgB" # Adam model_id: "eleven_multilingual_v2" @@ -45,6 +47,7 @@ tts: model: "gpt-4o-mini-tts" voice: "alloy" # alloy, echo, fable, onyx, nova, shimmer base_url: "https://api.openai.com/v1" # Override for OpenAI-compatible TTS endpoints + speed: 1.0 # 0.25 - 4.0 minimax: model: "speech-2.8-hd" # speech-2.8-hd (default), speech-2.8-turbo voice_id: "English_Graceful_Lady" # See https://platform.minimax.io/faq/system-voice-id @@ -61,6 +64,8 @@ tts: device: cpu ``` +**Speed control**: The global `tts.speed` value applies to all providers by default. Each provider can override it with its own `speed` setting (e.g., `tts.openai.speed: 1.5`). Provider-specific speed takes precedence over the global value. Default is `1.0` (normal speed). + ### Telegram Voice Bubbles & ffmpeg Telegram voice bubbles require Opus/OGG audio format: diff --git a/website/docs/user-guide/features/web-dashboard.md b/website/docs/user-guide/features/web-dashboard.md index 7c01070307..500e48e353 100644 --- a/website/docs/user-guide/features/web-dashboard.md +++ b/website/docs/user-guide/features/web-dashboard.md @@ -1,7 +1,7 @@ --- sidebar_position: 15 title: "Web Dashboard" -description: "Browser-based dashboard for managing configuration, API keys, and monitoring sessions" +description: "Browser-based dashboard for managing configuration, API keys, sessions, logs, analytics, cron jobs, and skills" --- # Web Dashboard @@ -104,6 +104,54 @@ Each key shows: Advanced/rarely-used keys are hidden by default behind a toggle. +### Sessions + +Browse and inspect all agent sessions. Each row shows the session title, source platform icon (CLI, Telegram, Discord, Slack, cron), model name, message count, tool call count, and how long ago it was active. Live sessions are marked with a pulsing badge. + +- **Search** — full-text search across all message content using FTS5. Results show highlighted snippets and auto-scroll to the first matching message when expanded. +- **Expand** — click a session to load its full message history. Messages are color-coded by role (user, assistant, system, tool) and rendered as Markdown with syntax highlighting. +- **Tool calls** — assistant messages with tool calls show collapsible blocks with the function name and JSON arguments. +- **Delete** — remove a session and its message history with the trash icon. + +### Logs + +View agent, gateway, and error log files with filtering and live tailing. + +- **File** — switch between `agent`, `errors`, and `gateway` log files +- **Level** — filter by log level: ALL, DEBUG, INFO, WARNING, or ERROR +- **Component** — filter by source component: all, gateway, agent, tools, cli, or cron +- **Lines** — choose how many lines to display (50, 100, 200, or 500) +- **Auto-refresh** — toggle live tailing that polls for new log lines every 5 seconds +- **Color-coded** — log lines are colored by severity (red for errors, yellow for warnings, dim for debug) + +### Analytics + +Usage and cost analytics computed from session history. Select a time period (7, 30, or 90 days) to see: + +- **Summary cards** — total tokens (input/output), cache hit percentage, total estimated or actual cost, and total session count with daily average +- **Daily token chart** — stacked bar chart showing input and output token usage per day, with hover tooltips showing breakdowns and cost +- **Daily breakdown table** — date, session count, input tokens, output tokens, cache hit rate, and cost for each day +- **Per-model breakdown** — table showing each model used, its session count, token usage, and estimated cost + +### Cron + +Create and manage scheduled cron jobs that run agent prompts on a recurring schedule. + +- **Create** — fill in a name (optional), prompt, cron expression (e.g. `0 9 * * *`), and delivery target (local, Telegram, Discord, Slack, or email) +- **Job list** — each job shows its name, prompt preview, schedule expression, state badge (enabled/paused/error), delivery target, last run time, and next run time +- **Pause / Resume** — toggle a job between active and paused states +- **Trigger now** — immediately execute a job outside its normal schedule +- **Delete** — permanently remove a cron job + +### Skills + +Browse, search, and toggle skills and toolsets. Skills are loaded from `~/.hermes/skills/` and grouped by category. + +- **Search** — filter skills and toolsets by name, description, or category +- **Category filter** — click category pills to narrow the list (e.g. MLOps, MCP, Red Teaming, AI) +- **Toggle** — enable or disable individual skills with a switch. Changes take effect on the next session. +- **Toolsets** — a separate section shows built-in toolsets (file operations, web browsing, etc.) with their active/inactive status, setup requirements, and list of included tools + :::warning Security The web dashboard reads and writes your `.env` file, which contains API keys and secrets. It binds to `127.0.0.1` by default — only accessible from your local machine. If you bind to `0.0.0.0`, anyone on your network can view and modify your credentials. The dashboard has no authentication of its own. ::: @@ -159,6 +207,66 @@ Sets an environment variable. Body: `{"key": "VAR_NAME", "value": "secret"}`. Removes an environment variable. Body: `{"key": "VAR_NAME"}`. +### GET /api/sessions/\{session_id\} + +Returns metadata for a single session. + +### GET /api/sessions/\{session_id\}/messages + +Returns the full message history for a session, including tool calls and timestamps. + +### GET /api/sessions/search + +Full-text search across message content. Query parameter: `q`. Returns matching session IDs with highlighted snippets. + +### DELETE /api/sessions/\{session_id\} + +Deletes a session and its message history. + +### GET /api/logs + +Returns log lines. Query parameters: `file` (agent/errors/gateway), `lines` (count), `level`, `component`. + +### GET /api/analytics/usage + +Returns token usage, cost, and session analytics. Query parameter: `days` (default 30). Response includes daily breakdowns and per-model aggregates. + +### GET /api/cron/jobs + +Returns all configured cron jobs with their state, schedule, and run history. + +### POST /api/cron/jobs + +Creates a new cron job. Body: `{"prompt": "...", "schedule": "0 9 * * *", "name": "...", "deliver": "local"}`. + +### POST /api/cron/jobs/\{job_id\}/pause + +Pauses a cron job. + +### POST /api/cron/jobs/\{job_id\}/resume + +Resumes a paused cron job. + +### POST /api/cron/jobs/\{job_id\}/trigger + +Immediately triggers a cron job outside its schedule. + +### DELETE /api/cron/jobs/\{job_id\} + +Deletes a cron job. + +### GET /api/skills + +Returns all skills with their name, description, category, and enabled status. + +### PUT /api/skills/toggle + +Enables or disables a skill. Body: `{"name": "skill-name", "enabled": true}`. + +### GET /api/tools/toolsets + +Returns all toolsets with their label, description, tools list, and active/configured status. + ## CORS The web server restricts CORS to localhost origins only: diff --git a/website/docs/user-guide/messaging/wecom-callback.md b/website/docs/user-guide/messaging/wecom-callback.md index 4662942769..dd8331fb7f 100644 --- a/website/docs/user-guide/messaging/wecom-callback.md +++ b/website/docs/user-guide/messaging/wecom-callback.md @@ -143,5 +143,5 @@ The crypto implementation is compatible with Tencent's official WXBizMsgCrypt SD - **No streaming** — replies arrive as complete messages after the agent finishes - **No typing indicators** — the callback model doesn't support typing status -- **Text only** — currently supports text messages; image/file/voice not yet implemented +- **Text only** — currently supports text messages for input; image/file/voice input not yet implemented. The agent is aware of outbound media capabilities via the WeCom platform hint (images, documents, video, voice). - **Response latency** — agent sessions take 3–30 minutes; users see the reply when processing completes diff --git a/website/docs/user-guide/messaging/whatsapp.md b/website/docs/user-guide/messaging/whatsapp.md index ac6c07b774..e4a8def077 100644 --- a/website/docs/user-guide/messaging/whatsapp.md +++ b/website/docs/user-guide/messaging/whatsapp.md @@ -174,6 +174,33 @@ whatsapp: --- +## Message Formatting & Delivery + +WhatsApp supports **streaming (progressive) responses** — the bot edits its message in real-time as the AI generates text, just like Discord and Telegram. Internally, WhatsApp is classified as a TIER_MEDIUM platform for delivery capabilities. + +### Chunking + +Long responses are automatically split into multiple messages at **4,096 characters** per chunk (WhatsApp's practical display limit). You don't need to configure anything — the gateway handles splitting and sends chunks sequentially. + +### WhatsApp-Compatible Markdown + +Standard Markdown in AI responses is automatically converted to WhatsApp's native formatting: + +| Markdown | WhatsApp | Renders as | +|----------|----------|------------| +| `**bold**` | `*bold*` | **bold** | +| `~~strikethrough~~` | `~strikethrough~` | ~~strikethrough~~ | +| `# Heading` | `*Heading*` | Bold text (no native headings) | +| `[link text](url)` | `link text (url)` | Inline URL | + +Code blocks and inline code are preserved as-is since WhatsApp supports triple-backtick formatting natively. + +### Tool Progress + +When the agent calls tools (web search, file operations, etc.), WhatsApp displays real-time progress indicators showing which tool is running. This is enabled by default — no configuration needed. + +--- + ## Troubleshooting | Problem | Solution | From ba50fa30352cbd74dbb6c13263c94e1a6bb4511c Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Mon, 13 Apr 2026 10:53:10 -0700 Subject: [PATCH 04/69] docs: fix 30+ inaccuracies across documentation (#9023) Cross-referenced all docs pages against the actual codebase and fixed: Reference docs (cli-commands.md, slash-commands.md, profile-commands.md): - Fix: hermes web -> hermes dashboard (correct subparser name) - Fix: Wrong provider list (removed deepseek, ai-gateway, opencode-zen, opencode-go, alibaba; added gemini) - Fix: Missing tts in hermes setup section choices - Add: Missing --image flag for hermes chat - Add: Missing --component flag for hermes logs - Add: Missing CLI commands: debug, backup, import - Fix: /status incorrectly marked as messaging-only (available everywhere) - Fix: /statusbar moved from Session to Configuration category - Add: Missing slash commands: /fast, /snapshot, /image, /debug - Add: Missing /restart from messaging commands table - Fix: /compress description to match COMMAND_REGISTRY - Add: --no-alias flag to profile create docs Configuration docs (configuration.md, environment-variables.md): - Fix: Vision timeout default 30s -> 120s - Fix: TTS providers missing minimax and mistral - Fix: STT providers missing mistral - Fix: TTS openai base_url shown with wrong default - Fix: Compression config showing stale summary_model/provider/base_url keys (migrated out in config v17) -> target_ratio/protect_last_n Getting-started docs: - Fix: Redundant faster-whisper install (already in voice extra) - Fix: Messaging extra description missing Slack Developer guide: - Fix: architecture.md tool count 48 -> 47, toolset count 40 -> 19 - Fix: run_agent.py line count 9,200 -> 10,700 - Fix: cli.py line count 8,500 -> 10,000 - Fix: main.py line count 5,500 -> 6,000 - Fix: gateway/run.py line count 7,500 -> 9,000 - Fix: Browser tools count 11 -> 10 - Fix: Platform adapter count 15 -> 18 (add wecom_callback, api_server) - Fix: agent-loop.md wrong budget sharing (not shared, independent) - Fix: agent-loop.md non-existent _get_budget_warning() reference - Fix: context-compression-and-caching.md non-existent function name - Fix: toolsets-reference.md safe toolset includes mixture_of_agents (it doesn't) - Fix: toolsets-reference.md hermes-cli tool count 38 -> 36 Guides: - Fix: automate-with-cron.md claims daily at 9am is valid (it's not) - Fix: delegation-patterns.md Max 3 presented as hard cap (configurable) - Fix: sessions.md group thread key format (shared by default, not per-user) - Fix: cron-internals.md job ID format and JSON structure --- website/docs/developer-guide/agent-loop.md | 9 +++----- website/docs/developer-guide/architecture.md | 23 ++++++++++--------- .../context-compression-and-caching.md | 2 +- .../docs/developer-guide/cron-internals.md | 19 +++++++++++---- .../docs/developer-guide/gateway-internals.md | 2 +- website/docs/getting-started/installation.md | 2 +- website/docs/getting-started/quickstart.md | 4 +--- website/docs/guides/automate-with-cron.md | 2 +- website/docs/guides/delegation-patterns.md | 2 +- website/docs/reference/cli-commands.md | 19 +++++++++------ .../docs/reference/environment-variables.md | 8 ++----- website/docs/reference/profile-commands.md | 1 + website/docs/reference/slash-commands.md | 7 +++++- website/docs/reference/toolsets-reference.md | 6 ++--- website/docs/user-guide/configuration.md | 9 ++++---- website/docs/user-guide/sessions.md | 2 +- 16 files changed, 65 insertions(+), 52 deletions(-) diff --git a/website/docs/developer-guide/agent-loop.md b/website/docs/developer-guide/agent-loop.md index b07fa04789..2d0df3278d 100644 --- a/website/docs/developer-guide/agent-loop.md +++ b/website/docs/developer-guide/agent-loop.md @@ -6,7 +6,7 @@ description: "Detailed walkthrough of AIAgent execution, API modes, tools, callb # Agent Loop Internals -The core orchestration engine is `run_agent.py`'s `AIAgent` class — roughly 9,200 lines that handle everything from prompt assembly to tool dispatch to provider failover. +The core orchestration engine is `run_agent.py`'s `AIAgent` class — roughly 10,700 lines that handle everything from prompt assembly to tool dispatch to provider failover. ## Core Responsibilities @@ -181,10 +181,7 @@ These tools modify agent state directly and return synthetic tool results withou The agent tracks iterations via `IterationBudget`: - Default: 90 iterations (configurable via `agent.max_turns`) -- Shared across parent and child agents — a subagent consumes from the parent's budget -- Two-tier budget pressure via `_get_budget_warning()`: - - At 70%+ usage (caution tier): appends `[BUDGET: Iteration X/Y. N iterations left. Start consolidating your work.]` to the last tool result - - At 90%+ usage (warning tier): appends `[BUDGET WARNING: Iteration X/Y. Only N iteration(s) left. Provide your final response NOW.]` +- Each agent gets its own budget. Subagents get independent budgets capped at `delegation.max_iterations` (default 50) — total iterations across parent + subagents can exceed the parent's cap - At 100%, the agent stops and returns a summary of work done ### Fallback Model @@ -224,7 +221,7 @@ After each turn: | File | Purpose | |------|---------| -| `run_agent.py` | AIAgent class — the complete agent loop (~9,200 lines) | +| `run_agent.py` | AIAgent class — the complete agent loop (~10,700 lines) | | `agent/prompt_builder.py` | System prompt assembly from memory, skills, context files, personality | | `agent/context_engine.py` | ContextEngine ABC — pluggable context management | | `agent/context_compressor.py` | Default engine — lossy summarization algorithm | diff --git a/website/docs/developer-guide/architecture.md b/website/docs/developer-guide/architecture.md index 53d8d72f7b..9e1d771ae3 100644 --- a/website/docs/developer-guide/architecture.md +++ b/website/docs/developer-guide/architecture.md @@ -32,8 +32,8 @@ This page is the top-level map of Hermes Agent internals. Use it to orient yours │ ┌──────┴───────┐ ┌──────┴───────┐ ┌──────┴───────┐ │ │ │ Compression │ │ 3 API Modes │ │ Tool Registry│ │ │ │ & Caching │ │ chat_compl. │ │ (registry.py)│ │ -│ │ │ │ codex_resp. │ │ 48 tools │ │ -│ │ │ │ anthropic │ │ 40 toolsets │ │ +│ │ │ │ codex_resp. │ │ 47 tools │ │ +│ │ │ │ anthropic │ │ 19 toolsets │ │ │ └──────────────┘ └──────────────┘ └──────────────┘ │ └─────────────────────────────────────────────────────────────────────┘ │ │ @@ -52,8 +52,8 @@ This page is the top-level map of Hermes Agent internals. Use it to orient yours ```text hermes-agent/ -├── run_agent.py # AIAgent — core conversation loop (~9,200 lines) -├── cli.py # HermesCLI — interactive terminal UI (~8,500 lines) +├── run_agent.py # AIAgent — core conversation loop (~10,700 lines) +├── cli.py # HermesCLI — interactive terminal UI (~10,000 lines) ├── model_tools.py # Tool discovery, schema collection, dispatch ├── toolsets.py # Tool groupings and platform presets ├── hermes_state.py # SQLite session/state database with FTS5 @@ -76,7 +76,7 @@ hermes-agent/ │ └── trajectory.py # Trajectory saving helpers │ ├── hermes_cli/ # CLI subcommands and setup -│ ├── main.py # Entry point — all `hermes` subcommands (~5,500 lines) +│ ├── main.py # Entry point — all `hermes` subcommands (~6,000 lines) │ ├── config.py # DEFAULT_CONFIG, OPTIONAL_ENV_VARS, migration │ ├── commands.py # COMMAND_REGISTRY — central slash command definitions │ ├── auth.py # PROVIDER_REGISTRY, credential resolution @@ -99,7 +99,7 @@ hermes-agent/ │ ├── process_registry.py # Background process management │ ├── file_tools.py # read_file, write_file, patch, search_files │ ├── web_tools.py # web_search, web_extract -│ ├── browser_tool.py # 11 browser automation tools +│ ├── browser_tool.py # 10 browser automation tools │ ├── code_execution_tool.py # execute_code sandbox │ ├── delegate_tool.py # Subagent delegation │ ├── mcp_tool.py # MCP client (~2,200 lines) @@ -109,7 +109,7 @@ hermes-agent/ │ └── environments/ # Terminal backends (local, docker, ssh, modal, daytona, singularity) │ ├── gateway/ # Messaging platform gateway -│ ├── run.py # GatewayRunner — message dispatch (~7,500 lines) +│ ├── run.py # GatewayRunner — message dispatch (~9,000 lines) │ ├── session.py # SessionStore — conversation persistence │ ├── delivery.py # Outbound message delivery │ ├── pairing.py # DM pairing authorization @@ -117,9 +117,10 @@ hermes-agent/ │ ├── mirror.py # Cross-session message mirroring │ ├── status.py # Token locks, profile-scoped process tracking │ ├── builtin_hooks/ # Always-registered hooks -│ └── platforms/ # 15 adapters: telegram, discord, slack, whatsapp, +│ └── platforms/ # 18 adapters: telegram, discord, slack, whatsapp, │ # signal, matrix, mattermost, email, sms, -│ # dingtalk, feishu, wecom, weixin, bluebubbles, homeassistant, webhook +│ # dingtalk, feishu, wecom, wecom_callback, weixin, +│ # bluebubbles, homeassistant, webhook, api_server │ ├── acp_adapter/ # ACP server (VS Code / Zed / JetBrains) ├── cron/ # Scheduler (jobs.py, scheduler.py) @@ -211,7 +212,7 @@ A shared runtime resolver used by CLI, gateway, cron, ACP, and auxiliary calls. ### Tool System -Central tool registry (`tools/registry.py`) with 47 registered tools across 20 toolsets. Each tool file self-registers at import time. The registry handles schema collection, dispatch, availability checking, and error wrapping. Terminal tools support 6 backends (local, Docker, SSH, Daytona, Modal, Singularity). +Central tool registry (`tools/registry.py`) with 47 registered tools across 19 toolsets. Each tool file self-registers at import time. The registry handles schema collection, dispatch, availability checking, and error wrapping. Terminal tools support 6 backends (local, Docker, SSH, Daytona, Modal, Singularity). → [Tools Runtime](./tools-runtime.md) @@ -223,7 +224,7 @@ SQLite-based session storage with FTS5 full-text search. Sessions have lineage t ### Messaging Gateway -Long-running process with 14 platform adapters, unified session routing, user authorization (allowlists + DM pairing), slash command dispatch, hook system, cron ticking, and background maintenance. +Long-running process with 18 platform adapters, unified session routing, user authorization (allowlists + DM pairing), slash command dispatch, hook system, cron ticking, and background maintenance. → [Gateway Internals](./gateway-internals.md) diff --git a/website/docs/developer-guide/context-compression-and-caching.md b/website/docs/developer-guide/context-compression-and-caching.md index c500612f65..29008ebb7d 100644 --- a/website/docs/developer-guide/context-compression-and-caching.md +++ b/website/docs/developer-guide/context-compression-and-caching.md @@ -53,7 +53,7 @@ Hermes has two separate compression layers that operate independently: ### 1. Gateway Session Hygiene (85% threshold) -Located in `gateway/run.py` (search for `_maybe_compress_session`). This is a **safety net** that +Located in `gateway/run.py` (search for `Session hygiene: auto-compress`). This is a **safety net** that runs before the agent processes a message. It prevents API failures when sessions grow too large between turns (e.g., overnight accumulation in Telegram/Discord). diff --git a/website/docs/developer-guide/cron-internals.md b/website/docs/developer-guide/cron-internals.md index 5eddcb7e8e..5d1cdc39c9 100644 --- a/website/docs/developer-guide/cron-internals.md +++ b/website/docs/developer-guide/cron-internals.md @@ -37,16 +37,25 @@ Jobs are stored in `~/.hermes/cron/jobs.json` with atomic write semantics (write ```json { - "id": "job_abc123", + "id": "a1b2c3d4e5f6", "name": "Daily briefing", "prompt": "Summarize today's AI news and funding rounds", - "schedule": "0 9 * * *", + "schedule": { + "kind": "cron", + "expr": "0 9 * * *", + "display": "0 9 * * *" + }, "skills": ["ai-funding-daily-report"], "deliver": "telegram:-1001234567890", - "repeat": null, + "repeat": { + "times": null, + "completed": 42 + }, "state": "scheduled", - "next_run": "2025-01-16T09:00:00Z", - "run_count": 42, + "enabled": true, + "next_run_at": "2025-01-16T09:00:00Z", + "last_run_at": "2025-01-15T09:00:00Z", + "last_status": "ok", "created_at": "2025-01-01T00:00:00Z", "model": null, "provider": null, diff --git a/website/docs/developer-guide/gateway-internals.md b/website/docs/developer-guide/gateway-internals.md index 0d97f13226..997930c0a4 100644 --- a/website/docs/developer-guide/gateway-internals.md +++ b/website/docs/developer-guide/gateway-internals.md @@ -12,7 +12,7 @@ The messaging gateway is the long-running process that connects Hermes to 14+ ex | File | Purpose | |------|---------| -| `gateway/run.py` | `GatewayRunner` — main loop, slash commands, message dispatch (~7,500 lines) | +| `gateway/run.py` | `GatewayRunner` — main loop, slash commands, message dispatch (~9,000 lines) | | `gateway/session.py` | `SessionStore` — conversation persistence and session key construction | | `gateway/delivery.py` | Outbound message delivery to target platforms/channels | | `gateway/pairing.py` | DM pairing flow for user authorization | diff --git a/website/docs/getting-started/installation.md b/website/docs/getting-started/installation.md index 5bdb6809e7..a28b1256e6 100644 --- a/website/docs/getting-started/installation.md +++ b/website/docs/getting-started/installation.md @@ -135,7 +135,7 @@ uv pip install -e "." | Extra | What it adds | Install command | |-------|-------------|-----------------| | `all` | Everything below | `uv pip install -e ".[all]"` | -| `messaging` | Telegram & Discord gateway | `uv pip install -e ".[messaging]"` | +| `messaging` | Telegram, Discord & Slack gateway | `uv pip install -e ".[messaging]"` | | `cron` | Cron expression parsing for scheduled tasks | `uv pip install -e ".[cron]"` | | `cli` | Terminal menu UI for setup wizard | `uv pip install -e ".[cli]"` | | `modal` | Modal cloud execution backend | `uv pip install -e ".[modal]"` | diff --git a/website/docs/getting-started/quickstart.md b/website/docs/getting-started/quickstart.md index 9646fbcc9f..c16aa30787 100644 --- a/website/docs/getting-started/quickstart.md +++ b/website/docs/getting-started/quickstart.md @@ -152,9 +152,7 @@ Want microphone input in the CLI or spoken replies in messaging? ```bash pip install "hermes-agent[voice]" - -# Optional but recommended for free local speech-to-text -pip install faster-whisper +# Includes faster-whisper for free local speech-to-text ``` Then start Hermes and enable it inside the CLI: diff --git a/website/docs/guides/automate-with-cron.md b/website/docs/guides/automate-with-cron.md index fba8a08284..b35897e897 100644 --- a/website/docs/guides/automate-with-cron.md +++ b/website/docs/guides/automate-with-cron.md @@ -254,7 +254,7 @@ The `--deliver` flag controls where results go: **Test with `/cron run`.** Before waiting for the schedule to trigger, use `/cron run ` to execute immediately and verify the output looks right. -**Schedule expressions.** Human-readable formats like `every 2h`, `30m`, and `daily at 9am` all work alongside standard cron expressions like `0 9 * * *`. +**Schedule expressions.** Supported formats: relative delays (`30m`), intervals (`every 2h`), standard cron expressions (`0 9 * * *`), and ISO timestamps (`2025-06-15T09:00:00`). Natural language like `daily at 9am` is not supported — use `0 9 * * *` instead. --- diff --git a/website/docs/guides/delegation-patterns.md b/website/docs/guides/delegation-patterns.md index e2eaa05cb7..4de7ebbd9e 100644 --- a/website/docs/guides/delegation-patterns.md +++ b/website/docs/guides/delegation-patterns.md @@ -216,7 +216,7 @@ Restricting toolsets keeps the subagent focused and prevents accidental side eff ## Constraints -- **Max 3 parallel tasks** — batches are capped at 3 concurrent subagents +- **Default 3 parallel tasks** — batches default to 3 concurrent subagents (configurable via `delegation.max_concurrent_children` in config.yaml) - **No nesting** — subagents cannot call `delegate_task`, `clarify`, `memory`, `send_message`, or `execute_code` - **Separate terminals** — each subagent gets its own terminal session with separate working directory and state - **No conversation history** — subagents see only what you put in `goal` and `context` diff --git a/website/docs/reference/cli-commands.md b/website/docs/reference/cli-commands.md index 95416d2127..a4b589379b 100644 --- a/website/docs/reference/cli-commands.md +++ b/website/docs/reference/cli-commands.md @@ -60,7 +60,10 @@ hermes [global-options] [subcommand/options] | `hermes sessions` | Browse, export, prune, rename, and delete sessions. | | `hermes insights` | Show token/cost/activity analytics. | | `hermes claw` | OpenClaw migration helpers. | -| `hermes web` | Launch the web dashboard for managing config, API keys, and sessions. | +| `hermes dashboard` | Launch the web dashboard for managing config, API keys, and sessions. | +| `hermes debug` | Debug tools — upload logs and system info for support. | +| `hermes backup` | Back up Hermes home directory to a zip file. | +| `hermes import` | Restore a Hermes backup from a zip file. | | `hermes profile` | Manage profiles — multiple isolated Hermes instances. | | `hermes completion` | Print shell completion scripts (bash/zsh). | | `hermes version` | Show version information. | @@ -80,10 +83,11 @@ Common options: | `-q`, `--query "..."` | One-shot, non-interactive prompt. | | `-m`, `--model ` | Override the model for this run. | | `-t`, `--toolsets ` | Enable a comma-separated set of toolsets. | -| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `deepseek`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `alibaba`. | +| `--provider ` | Force a provider: `auto`, `openrouter`, `nous`, `openai-codex`, `copilot-acp`, `copilot`, `anthropic`, `gemini`, `huggingface`, `zai`, `kimi-coding`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`. | | `-s`, `--skills ` | Preload one or more skills for the session (can be repeated or comma-separated). | | `-v`, `--verbose` | Verbose output. | | `-Q`, `--quiet` | Programmatic mode: suppress banner/spinner/tool previews. | +| `--image ` | Attach a local image to a single query. | | `--resume ` / `--continue [name]` | Resume a session directly from `chat`. | | `--worktree` | Create an isolated git worktree for this run. | | `--checkpoints` | Enable filesystem checkpoints before destructive file changes. | @@ -160,7 +164,7 @@ Use `hermes gateway run` instead of `hermes gateway start` — WSL's systemd sup ## `hermes setup` ```bash -hermes setup [model|terminal|gateway|tools|agent] [--non-interactive] [--reset] +hermes setup [model|tts|terminal|gateway|tools|agent] [--non-interactive] [--reset] ``` Use the full wizard or jump into one section: @@ -448,6 +452,7 @@ View, tail, and filter Hermes log files. All logs are stored in `~/.hermes/logs/ | `--level ` | Minimum log level to show: `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`. | | `--session ` | Filter lines containing a session ID substring. | | `--since