diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index c5fd9a20aee..4c88772327f 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -1680,26 +1680,48 @@ def _read_main_provider() -> str: # per turn — no lock needed. Cleared by ``clear_runtime_main()``. _RUNTIME_MAIN_PROVIDER: str = "" _RUNTIME_MAIN_MODEL: str = "" +_RUNTIME_MAIN_BASE_URL: str = "" +_RUNTIME_MAIN_API_KEY: str = "" +_RUNTIME_MAIN_API_MODE: str = "" -def set_runtime_main(provider: str, model: str) -> None: - """Record the live runtime provider/model for the current AIAgent. +def set_runtime_main( + provider: str, + model: str, + *, + base_url: str = "", + api_key: str = "", + api_mode: str = "", +) -> None: + """Record the live runtime provider/model/credentials for the current AIAgent. Called by ``run_agent.AIAgent._sync_runtime_main_for_aux_routing`` (or equivalent setter) at the top of each turn so that ``_read_main_provider`` / ``_read_main_model`` reflect CLI/gateway overrides instead of the stale config.yaml default. + + For ``custom:`` providers, ``base_url`` and ``api_key`` must also be + recorded so that ``_resolve_auto`` can construct a valid client in + Step 1 instead of falling through to the aggregator chain. """ global _RUNTIME_MAIN_PROVIDER, _RUNTIME_MAIN_MODEL + global _RUNTIME_MAIN_BASE_URL, _RUNTIME_MAIN_API_KEY, _RUNTIME_MAIN_API_MODE _RUNTIME_MAIN_PROVIDER = (provider or "").strip().lower() _RUNTIME_MAIN_MODEL = (model or "").strip() + _RUNTIME_MAIN_BASE_URL = (base_url or "").strip() + _RUNTIME_MAIN_API_KEY = api_key.strip() if isinstance(api_key, str) else "" + _RUNTIME_MAIN_API_MODE = (api_mode or "").strip() def clear_runtime_main() -> None: """Clear the runtime override (e.g. on session end).""" global _RUNTIME_MAIN_PROVIDER, _RUNTIME_MAIN_MODEL + global _RUNTIME_MAIN_BASE_URL, _RUNTIME_MAIN_API_KEY, _RUNTIME_MAIN_API_MODE _RUNTIME_MAIN_PROVIDER = "" _RUNTIME_MAIN_MODEL = "" + _RUNTIME_MAIN_BASE_URL = "" + _RUNTIME_MAIN_API_KEY = "" + _RUNTIME_MAIN_API_MODE = "" def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[str]]: @@ -2980,6 +3002,18 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option runtime_api_key = runtime.get("api_key", "") runtime_api_mode = str(runtime.get("api_mode") or "") + # Fall back to process-local globals when main_runtime dict was not + # provided or was incomplete. ``set_runtime_main()`` now records + # base_url/api_key/api_mode alongside provider/model, so custom: + # providers get the full credential surface in Step 1 of the + # auto-detect chain. + if not runtime_base_url and _RUNTIME_MAIN_BASE_URL: + runtime_base_url = _RUNTIME_MAIN_BASE_URL + if not runtime_api_key and _RUNTIME_MAIN_API_KEY: + runtime_api_key = _RUNTIME_MAIN_API_KEY + if not runtime_api_mode and _RUNTIME_MAIN_API_MODE: + runtime_api_mode = _RUNTIME_MAIN_API_MODE + # ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named # provider (not 'custom'). This catches the common "env poisoning" # scenario where a user switches providers via `hermes model` but the diff --git a/agent/chat_completion_helpers.py b/agent/chat_completion_helpers.py index 0785347d2c9..cc7427950b2 100644 --- a/agent/chat_completion_helpers.py +++ b/agent/chat_completion_helpers.py @@ -1283,6 +1283,18 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str: agent._copy_reasoning_content_for_api(msg, api_msg) for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"): api_msg.pop(internal_field, None) + # Strict OpenAI-compatible gateways (Fireworks-backed OpenCode Go, + # Mistral, Moonshot/Kimi) reject any message key outside the Chat + # Completions schema. The main loop drops these via + # ChatCompletionsTransport.convert_messages(), but the summary path + # hand-builds messages and calls chat.completions.create() directly, + # bypassing the transport — so mirror that sanitization here: + # tool_name (SQLite FTS bookkeeping), the codex_* reasoning carriers, + # and every Hermes-internal underscore-prefixed scaffolding key. + for schema_foreign in ("tool_name", "codex_reasoning_items", "codex_message_items"): + api_msg.pop(schema_foreign, None) + for internal_key in [k for k in api_msg if isinstance(k, str) and k.startswith("_")]: + api_msg.pop(internal_key, None) if _needs_sanitize: agent._sanitize_tool_calls_for_strict_api(api_msg) api_messages.append(api_msg) diff --git a/agent/context_compressor.py b/agent/context_compressor.py index cf9c534decd..079c4b0b560 100644 --- a/agent/context_compressor.py +++ b/agent/context_compressor.py @@ -40,17 +40,47 @@ SUMMARY_PREFIX = ( "window — treat it as background reference, NOT as active instructions. " "Do NOT answer questions or fulfill requests mentioned in this summary; " "they were already addressed. " - "Your current task is identified in the '## Active Task' section of the " - "summary — resume exactly from there. " + "Respond ONLY to the latest user message that appears AFTER this " + "summary — that message is the single source of truth for what to do " + "right now. " + "If the latest user message is consistent with the '## Active Task' " + "section, you may use the summary as background. If the latest user " + "message contradicts, supersedes, changes topic from, or in any way " + "diverges from '## Active Task' / '## In Progress' / '## Pending User " + "Asks' / '## Remaining Work', the latest message WINS — discard those " + "stale items entirely and do not 'wrap up the old task first'. " + "Reverse signals in the latest message (e.g. 'stop', 'undo', 'roll " + "back', 'just verify', 'don't do that anymore', 'never mind', a new " + "topic) must immediately end any in-flight work described in the " + "summary; do not re-surface it in later turns. " "IMPORTANT: Your persistent memory (MEMORY.md, USER.md) in the system " "prompt is ALWAYS authoritative and active — never ignore or deprioritize " "memory content due to this compaction note. " - "Respond ONLY to the latest user message " - "that appears AFTER this summary. The current session state (files, " - "config, etc.) may reflect work described here — avoid repeating it:" + "The current session state (files, config, etc.) may reflect work " + "described here — avoid repeating it:" ) LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:" +# Handoff prefixes that shipped in earlier releases. A summary persisted under +# one of these can be inherited into a resumed lineage (#35344); when it is +# re-normalized on re-compaction we must strip the OLD prefix too, otherwise the +# stale directive it carried (e.g. "resume exactly from Active Task") survives +# embedded in the body and keeps hijacking replies. Keep newest-first; entries +# are matched literally. Add a frozen copy here whenever SUMMARY_PREFIX changes. +_HISTORICAL_SUMMARY_PREFIXES = ( + # Pre-#35344: contained the self-contradicting "resume exactly" directive. + "[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted " + "into the summary below. This is a handoff from a previous context " + "window — treat it as background reference, NOT as active instructions. " + "Do NOT answer questions or fulfill requests mentioned in this summary; " + "they were already addressed. " + "Your current task is identified in the '## Active Task' section of the " + "summary — resume exactly from there. " + "Respond ONLY to the latest user message " + "that appears AFTER this summary. The current session state (files, " + "config, etc.) may reflect work described here — avoid repeating it:", +) + # Minimum tokens for the summary output _MIN_SUMMARY_TOKENS = 2000 # Proportion of compressed content to allocate for summary @@ -1236,11 +1266,27 @@ Summary generation was unavailable, so this is a best-effort deterministic fallb # Shared structured template (used by both paths). _template_sections = f"""## Active Task -[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or -task assignment verbatim — the exact words they used. If multiple tasks -were requested and only some are done, list only the ones NOT yet completed. -Continuation should pick up exactly here. Example: +[THE SINGLE MOST IMPORTANT FIELD. Capture the user's most recent unfulfilled +input verbatim — the exact words they used. This includes: +- Explicit task assignments ("refactor the auth module") +- Questions awaiting an answer ("waarom staat X op Y?", "wat zijn de volgende stappen?") +- Decisions awaiting input ("optie A of B?") +- Ongoing discussions where the assistant owes the next substantive reply +A conversation where the user just asked a question IS an active task — the +task is "answer that question with full context". Do NOT write "None" merely +because the user did not issue an imperative command; reserve "None" for the +rare case where the last exchange was fully resolved and the user said +something like "thanks, that's all". +If multiple items are outstanding, list only the ones NOT yet completed. +Continuation should pick up exactly here. Examples: "User asked: 'Now refactor the auth module to use JWT instead of sessions'" +"User asked: 'Waarom stond provider ineens op openrouter?' — needs investigation + answer" +"User chose option A; awaiting implementation of step 2" +If the user's most recent message was a reverse signal (stop, undo, roll +back, never mind, just verify, change of topic) that supersedes earlier +work, write the reverse signal verbatim and DO NOT carry forward the +cancelled task. Example: "User asked: 'Stop the i18n refactor and just +verify the current diff' — earlier i18n in-flight work is cancelled." If no outstanding task exists, write "None."] ## Goal @@ -1306,7 +1352,7 @@ PREVIOUS SUMMARY: NEW TURNS TO INCORPORATE: {content_to_summarize} -Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity. +Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled input — this includes any question, decision request, or discussion turn that the assistant has not yet answered. Only write "None" if the last exchange was fully resolved. {_template_sections}""" else: @@ -1470,9 +1516,16 @@ The user has requested that this compaction PRIORITISE preserving all informatio @staticmethod def _strip_summary_prefix(summary: str) -> str: - """Return summary body without the current or legacy handoff prefix.""" + """Return summary body without the current, legacy, or any historical + handoff prefix. + + Historical prefixes must be stripped too: a handoff persisted under an + older prefix can be inherited into a resumed lineage (#35344), and if we + only re-prepend the current prefix without removing the old one, the + stale directive it carried stays embedded in the body. + """ text = (summary or "").strip() - for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX): + for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX, *_HISTORICAL_SUMMARY_PREFIXES): if text.startswith(prefix): return text[len(prefix):].lstrip() return text @@ -1486,7 +1539,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio @staticmethod def _is_context_summary_content(content: Any) -> bool: text = _content_text_for_contains(content).lstrip() - return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX) + if text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX): + return True + return any(text.startswith(p) for p in _HISTORICAL_SUMMARY_PREFIXES) @classmethod def _find_latest_context_summary( diff --git a/agent/context_engine.py b/agent/context_engine.py index bb426fc189d..79c31fb48e6 100644 --- a/agent/context_engine.py +++ b/agent/context_engine.py @@ -115,6 +115,15 @@ class ContextEngine(ABC): """ return False + def should_defer_preflight_to_real_usage(self, rough_tokens: int) -> bool: + """Return True when preflight should trust recent real usage instead. + + Built-in compression uses this to avoid re-compacting from known-noisy + rough estimates after a compressed request has already fit. Third-party + engines can ignore it safely. + """ + return False + # -- Optional: manual /compress preflight ------------------------------ def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool: diff --git a/agent/conversation_compression.py b/agent/conversation_compression.py index 9a93ba4a496..ba8678cc723 100644 --- a/agent/conversation_compression.py +++ b/agent/conversation_compression.py @@ -575,19 +575,18 @@ def compress_context( force=True, ) - # Update token estimate after compaction so pressure calculations - # use the post-compression count, not the stale pre-compression one. - # Use estimate_request_tokens_rough() so tool schemas are included — - # with 50+ tools enabled, schemas alone can add 20-30K tokens, and - # omitting them delays the next compression cycle far past the - # configured threshold (issue #14695). + # Keep the post-compression rough estimate for diagnostics, but do not + # treat it as provider-reported prompt usage. Schema-heavy rough estimates + # can remain above threshold even after the next real API request fits. _compressed_est = estimate_request_tokens_rough( compressed, system_prompt=new_system_prompt or "", tools=agent.tools or None, ) - agent.context_compressor.last_prompt_tokens = _compressed_est + agent.context_compressor.last_compression_rough_tokens = _compressed_est + agent.context_compressor.last_prompt_tokens = -1 agent.context_compressor.last_completion_tokens = 0 + agent.context_compressor.awaiting_real_usage_after_compression = True # Clear the file-read dedup cache. After compression the original # read content is summarised away — if the model re-reads the same @@ -599,7 +598,7 @@ def compress_context( pass logger.info( - "context compression done: session=%s messages=%d->%d tokens=~%s", + "context compression done: session=%s messages=%d->%d rough_tokens=~%s awaiting_real_usage=true", agent.session_id or "none", _pre_msg_count, len(compressed), f"{_compressed_est:,}", ) diff --git a/agent/conversation_loop.py b/agent/conversation_loop.py index a6c975be391..bb6c6229cdb 100644 --- a/agent/conversation_loop.py +++ b/agent/conversation_loop.py @@ -392,6 +392,9 @@ def run_conversation( set_runtime_main( getattr(agent, "provider", "") or "", getattr(agent, "model", "") or "", + base_url=getattr(agent, "base_url", "") or "", + api_key=getattr(agent, "api_key", "") or "", + api_mode=getattr(agent, "api_mode", "") or "", ) except Exception: pass @@ -600,18 +603,50 @@ def run_conversation( system_prompt=active_system_prompt or "", tools=agent.tools or None, ) + _compressor = agent.context_compressor + _defer_preflight = getattr( + _compressor, + "should_defer_preflight_to_real_usage", + lambda _tokens: False, + ) + _preflight_deferred = _defer_preflight(_preflight_tokens) - if agent.context_compressor.should_compress(_preflight_tokens): + if not _preflight_deferred: + # Keep the CLI/ACP context display in sync with what preflight + # actually measured. The status bar reads + # ``compressor.last_prompt_tokens``, which otherwise only updates + # from a *successful* API response. When the conversation has grown + # since the last successful call — or when compression then fails + # (e.g. the auxiliary summary model times out) and no fresh usage + # arrives — the bar stays stuck at the old, smaller value while + # preflight reports a much larger number, looking out of sync. + # Seed it with the fresh estimate (only ever revising upward; a real + # ``update_from_response`` will correct it after the next API call). + # Skipped when deferring — a deferred estimate is known to over-count + # vs the last real provider prompt, so trusting it for the display + # would re-introduce the very desync we're avoiding. + if _preflight_tokens > (_compressor.last_prompt_tokens or 0): + _compressor.last_prompt_tokens = _preflight_tokens + + if _preflight_deferred: + logger.info( + "Skipping preflight compression: rough estimate ~%s >= %s, " + "but last real provider prompt was %s after compression", + f"{_preflight_tokens:,}", + f"{_compressor.threshold_tokens:,}", + f"{_compressor.last_real_prompt_tokens:,}", + ) + elif _compressor.should_compress(_preflight_tokens): logger.info( "Preflight compression: ~%s tokens >= %s threshold (model %s, ctx %s)", f"{_preflight_tokens:,}", - f"{agent.context_compressor.threshold_tokens:,}", + f"{_compressor.threshold_tokens:,}", agent.model, - f"{agent.context_compressor.context_length:,}", + f"{_compressor.context_length:,}", ) agent._emit_status( f"📦 Preflight compression: ~{_preflight_tokens:,} tokens " - f">= {agent.context_compressor.threshold_tokens:,} threshold. " + f">= {_compressor.threshold_tokens:,} threshold. " "This may take a moment." ) # May need multiple passes for very large sessions with small @@ -646,8 +681,8 @@ def run_conversation( system_prompt=active_system_prompt or "", tools=agent.tools or None, ) - if _preflight_tokens < agent.context_compressor.threshold_tokens: - break # Under threshold + if not _compressor.should_compress(_preflight_tokens): + break # Under threshold or anti-thrash guard stopped it # Plugin hook: pre_llm_call # Fired once per turn before the tool-calling loop. Plugins can @@ -1457,7 +1492,8 @@ def run_conversation( if retry_count >= max_retries: # Try fallback before giving up - agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...") + if agent._has_pending_fallback(): + agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...") if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 @@ -3059,12 +3095,17 @@ def run_conversation( ) and not is_context_length_error if is_client_error: - # Try fallback before aborting — a different provider - # may not have the same issue (rate limit, auth, etc.) - if classified.reason == FailoverReason.content_policy_blocked: - agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...") - else: - agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...") + # Try fallback before aborting — a different provider may + # not have the same issue (rate limit, auth, etc.). Only + # announce the attempt when a fallback chain actually + # exists; otherwise "trying fallback..." is a lie and the + # session looks like it's recovering when it's about to + # abort silently (#35314, #17446). + if agent._has_pending_fallback(): + if classified.reason == FailoverReason.content_policy_blocked: + agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...") + else: + agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...") if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 @@ -3207,7 +3248,8 @@ def run_conversation( retry_count = 0 continue # Try fallback before giving up entirely - agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...") + if agent._has_pending_fallback(): + agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...") if agent._try_activate_fallback(): retry_count = 0 compression_attempts = 0 @@ -3862,6 +3904,11 @@ def run_conversation( # inflate completion_tokens with reasoning, # causing premature compression. (#12026) _real_tokens = _compressor.last_prompt_tokens + elif _compressor.last_prompt_tokens == -1: + # Compression just ran and no API-reported prompt count + # has arrived yet. Avoid treating a schema-heavy rough + # post-compression estimate as real context pressure. + _real_tokens = 0 else: # Include tool schemas — with 50+ tools enabled # these add 20-30K tokens the messages-only @@ -4443,6 +4490,55 @@ def run_conversation( except Exception as _ver_err: logger.debug("file-mutation verifier footer failed: %s", _ver_err) + # Turn-completion explainer. + # When a turn ends abnormally after substantive work — empty content + # after retries, a partial/truncated stream, a still-pending tool + # result, or an iteration/budget limit — the user otherwise gets a + # blank or fragmentary response box with no consolidated reason why + # the agent stopped (#34452). Surface a single user-visible + # explanation derived from ``_turn_exit_reason``, mirroring the + # file-mutation verifier footer pattern above. + # + # Gate carefully so healthy turns stay quiet: + # - ``text_response(...)`` exits never produce an explanation + # (handled inside the formatter), so a terse ``Done.`` is silent. + # - We only ACT when there is no genuinely usable reply this turn: + # an empty response, the "(empty)" terminal sentinel, or a + # suspiciously short partial fragment with no terminating + # punctuation (e.g. "The"). A real short answer keeps its text. + if not interrupted: + try: + if agent._turn_completion_explainer_enabled(): + _stripped = (final_response or "").strip() + _is_empty_terminal = _stripped == "" or _stripped == "(empty)" + # A short fragment that is not a normal text_response exit + # and lacks sentence-ending punctuation is treated as a + # truncated partial (the "The" case from #34452). + _is_partial_fragment = ( + not _is_empty_terminal + and not str(_turn_exit_reason).startswith("text_response") + and len(_stripped) <= 24 + and _stripped[-1:] not in {".", "!", "?", "。", "!", "?", "`", ")"} + ) + if _is_empty_terminal or _is_partial_fragment: + _explanation = agent._format_turn_completion_explanation( + _turn_exit_reason + ) + if _explanation: + if _is_empty_terminal: + # Replace the bare "(empty)"/blank sentinel with + # the actionable explanation. + final_response = _explanation + else: + # Keep the partial fragment, append the reason so + # the user sees both what arrived and why it + # stopped. + final_response = ( + _stripped + "\n\n" + _explanation + ) + except Exception as _exp_err: + logger.debug("turn-completion explainer failed: %s", _exp_err) + _response_transformed = False # Plugin hook: transform_llm_output diff --git a/agent/lsp/cli.py b/agent/lsp/cli.py index 121cfa5f92c..139baa213f7 100644 --- a/agent/lsp/cli.py +++ b/agent/lsp/cli.py @@ -247,18 +247,13 @@ def _cmd_restart() -> int: def _cmd_which(server_id: str) -> int: - from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir - import shutil as _shutil + from agent.lsp.install import INSTALL_RECIPES, _existing_binary recipe = INSTALL_RECIPES.get(server_id) bin_name = (recipe or {}).get("bin", server_id) - staged = hermes_lsp_bin_dir() / bin_name - if staged.exists(): - sys.stdout.write(str(staged) + "\n") - return 0 - on_path = _shutil.which(bin_name) - if on_path: - sys.stdout.write(on_path + "\n") + resolved = _existing_binary(bin_name) + if resolved: + sys.stdout.write(resolved + "\n") return 0 sys.stderr.write(f"{server_id}: not installed\n") return 1 @@ -292,11 +287,9 @@ def _backend_warnings() -> list: suggestion across common platforms. """ import shutil as _shutil - from agent.lsp.install import hermes_lsp_bin_dir + from agent.lsp.install import _existing_binary notes: list = [] - bash_installed = _shutil.which("bash-language-server") is not None or ( - (hermes_lsp_bin_dir() / "bash-language-server").exists() - ) + bash_installed = _existing_binary("bash-language-server") is not None if bash_installed and _shutil.which("shellcheck") is None: notes.append( "bash-language-server is installed but shellcheck is missing — " diff --git a/agent/lsp/client.py b/agent/lsp/client.py index 06a92ae351b..c135e554c5d 100644 --- a/agent/lsp/client.py +++ b/agent/lsp/client.py @@ -44,6 +44,7 @@ from __future__ import annotations import asyncio import logging import os +import sys from pathlib import Path from typing import Any, Awaitable, Callable, Dict, List, Optional, Set from urllib.parse import quote, unquote @@ -244,15 +245,27 @@ class LSPClient: await self._cleanup_process() raise + @staticmethod + def _win_wrap_cmd(cmd: List[str]) -> List[str]: + """On Windows, wrap .cmd/.bat shims so CreateProcess can run them.""" + exe = cmd[0] + if exe.lower().endswith((".cmd", ".bat")): + return ["cmd.exe", "/c", *cmd] + return cmd + async def _spawn(self) -> None: env = dict(os.environ) if self._env: env.update(self._env) + cmd = self._command + if sys.platform == "win32": + cmd = self._win_wrap_cmd(cmd) + try: self._proc = await asyncio.create_subprocess_exec( - self._command[0], - *self._command[1:], + cmd[0], + *cmd[1:], stdin=asyncio.subprocess.PIPE, stdout=asyncio.subprocess.PIPE, stderr=asyncio.subprocess.PIPE, @@ -261,7 +274,7 @@ class LSPClient: ) except FileNotFoundError as e: raise LSPProtocolError( - f"LSP server binary not found: {self._command[0]} ({e})" + f"LSP server binary not found: {cmd[0]} ({e})" ) from e # Drain stderr at debug level — if we don't, the pipe buffer diff --git a/agent/lsp/install.py b/agent/lsp/install.py index d4a80ec195e..9193b0375c0 100644 --- a/agent/lsp/install.py +++ b/agent/lsp/install.py @@ -108,6 +108,11 @@ INSTALL_RECIPES: Dict[str, Dict[str, Any]] = { _install_locks: Dict[str, threading.Lock] = {} _install_results: Dict[str, Optional[str]] = {} _install_lock_meta = threading.Lock() +_WINDOWS_WRAPPER_SUFFIXES = (".cmd", ".exe", ".bat") + + +def _is_windows() -> bool: + return os.name == "nt" def hermes_lsp_bin_dir() -> Path: @@ -120,14 +125,33 @@ def hermes_lsp_bin_dir() -> Path: return p +def _native_binary_candidates(base: Path) -> list[Path]: + """Return platform-native executable candidates for a staged binary.""" + candidates = [base] + if _is_windows(): + existing = {str(base).lower()} + for suffix in _WINDOWS_WRAPPER_SUFFIXES: + candidate = Path(str(base) + suffix) + key = str(candidate).lower() + if key not in existing: + candidates.append(candidate) + existing.add(key) + return candidates + + def _existing_binary(name: str) -> Optional[str]: """Probe the staging dir + PATH for a binary named ``name``.""" - staged = hermes_lsp_bin_dir() / name - if staged.exists() and os.access(staged, os.X_OK): - return str(staged) + for staged in _native_binary_candidates(hermes_lsp_bin_dir() / name): + if staged.exists() and os.access(staged, os.X_OK): + return str(staged) on_path = shutil.which(name) if on_path: return on_path + if _is_windows(): + for suffix in _WINDOWS_WRAPPER_SUFFIXES: + on_path = shutil.which(f"{name}{suffix}") + if on_path: + return on_path return None @@ -250,12 +274,7 @@ def _install_npm( # Find the bin nm_bin = staging / "node_modules" / ".bin" / bin_name - if os.name == "nt": - # On Windows npm sometimes drops `.cmd` shims - candidates = [nm_bin, nm_bin.with_suffix(".cmd")] - else: - candidates = [nm_bin] - for c in candidates: + for c in _native_binary_candidates(nm_bin): if c.exists(): # Symlink into our `lsp/bin/` for stable PATH access. link = hermes_lsp_bin_dir() / c.name @@ -301,7 +320,7 @@ def _install_go(pkg: str, bin_name: str) -> Optional[str]: logger.warning("[install] go install errored for %s: %s", pkg, e) return None bin_path = staging / bin_name - if os.name == "nt": + if _is_windows(): bin_path = bin_path.with_suffix(".exe") if bin_path.exists(): return str(bin_path) @@ -337,19 +356,24 @@ def _install_pip(pkg: str, bin_name: str) -> Optional[str]: except (subprocess.TimeoutExpired, OSError) as e: logger.warning("[install] pip install errored for %s: %s", pkg, e) return None - # Look for the script - bin_path = pip_target / "bin" / bin_name - if bin_path.exists(): - link = hermes_lsp_bin_dir() / bin_name - if not link.exists(): - try: - link.symlink_to(bin_path) - except (OSError, NotImplementedError): - try: - shutil.copy2(bin_path, link) - except OSError: - return str(bin_path) - return str(link if link.exists() else bin_path) + # Look for the console script. POSIX wheels generally write to bin/, + # while native Windows installs use Scripts/. + script_dirs = [pip_target / "bin"] + if _is_windows(): + script_dirs.append(pip_target / "Scripts") + for script_dir in script_dirs: + for bin_path in _native_binary_candidates(script_dir / bin_name): + if bin_path.exists(): + link = hermes_lsp_bin_dir() / bin_path.name + if not link.exists(): + try: + link.symlink_to(bin_path) + except (OSError, NotImplementedError): + try: + shutil.copy2(bin_path, link) + except OSError: + return str(bin_path) + return str(link if link.exists() else bin_path) return None diff --git a/agent/tool_executor.py b/agent/tool_executor.py index 358c1a0a8f7..bbbd239dff9 100644 --- a/agent/tool_executor.py +++ b/agent/tool_executor.py @@ -180,28 +180,9 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe except Exception: pass - # Checkpoint for file-mutating tools - if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled: - try: - file_path = function_args.get("path", "") - if file_path: - work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path) - agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}") - except Exception: - pass - - # Checkpoint before destructive terminal commands - if function_name == "terminal" and agent._checkpoint_mgr.enabled: - try: - cmd = function_args.get("command", "") - if _is_destructive_command(cmd): - cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd()) - agent._checkpoint_mgr.ensure_checkpoint( - cwd, f"before terminal: {cmd[:60]}" - ) - except Exception: - pass - + # ── Block evaluation (BEFORE checkpoint preflight) ─────────── + # We must know whether the tool will execute before touching + # checkpoint state (dedup slot, real snapshots). block_result = None blocked_by_guardrail = False if _ts_scope_block is not None: @@ -224,6 +205,30 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe block_result = agent._guardrail_block_result(guardrail_decision) blocked_by_guardrail = True + # ── Checkpoint preflight (only for tools that will execute) ── + if block_result is None: + # Checkpoint for file-mutating tools + if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled: + try: + file_path = function_args.get("path", "") + if file_path: + work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path) + agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}") + except Exception: + pass + + # Checkpoint before destructive terminal commands + if function_name == "terminal" and agent._checkpoint_mgr.enabled: + try: + cmd = function_args.get("command", "") + if _is_destructive_command(cmd): + cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd()) + agent._checkpoint_mgr.ensure_checkpoint( + cwd, f"before terminal: {cmd[:60]}" + ) + except Exception: + pass + parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail)) # ── Logging / callbacks ────────────────────────────────────────── @@ -301,33 +306,38 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe # submit site below (GHSA-qg5c-hvr5-hjgr, #13617). start = time.time() try: - result = agent._invoke_tool( - function_name, - function_args, - effective_task_id, - tool_call.id, - messages=messages, - pre_tool_block_checked=True, - ) - except Exception as tool_error: - result = f"Error executing tool '{function_name}': {tool_error}" - logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True) - duration = time.time() - start - is_error, _ = _detect_tool_failure(function_name, result) - if is_error: - logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200]) - else: - logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result)) - results[index] = (function_name, function_args, result, duration, is_error, False) - # Tear down worker-tid tracking. Clear any interrupt bit we may - # have set so the next task scheduled onto this recycled tid - # starts with a clean slate. - with agent._tool_worker_threads_lock: - agent._tool_worker_threads.discard(_worker_tid) - try: - _ra()._set_interrupt(False, _worker_tid) - except Exception: - pass + try: + result = agent._invoke_tool( + function_name, + function_args, + effective_task_id, + tool_call.id, + messages=messages, + pre_tool_block_checked=True, + ) + except Exception as tool_error: + result = f"Error executing tool '{function_name}': {tool_error}" + logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True) + duration = time.time() - start + is_error, _ = _detect_tool_failure(function_name, result) + if is_error: + logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200]) + else: + logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result)) + results[index] = (function_name, function_args, result, duration, is_error, False) + finally: + # Tear down worker-tid tracking. Clear any interrupt bit we may + # have set so the next task scheduled onto this recycled tid + # starts with a clean slate. This MUST be in a finally block + # because BaseException subclasses (CancelledError, KeyboardInterrupt) + # bypass ``except Exception`` and would otherwise leak the tid + # into _interrupted_threads, poisoning the recycled thread. + with agent._tool_worker_threads_lock: + agent._tool_worker_threads.discard(_worker_tid) + try: + _ra()._set_interrupt(False, _worker_tid) + except Exception: + pass # Start spinner for CLI mode (skip when TUI handles tool progress) spinner = None @@ -753,10 +763,14 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe elif function_name == "delegate_task": tasks_arg = function_args.get("tasks") if tasks_arg and isinstance(tasks_arg, list): - spinner_label = f"🔀 delegating {len(tasks_arg)} tasks" + spinner_label = f"🔀 delegating {len(tasks_arg)} tasks · (/agents to monitor)" else: goal_preview = (function_args.get("goal") or "")[:30] - spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating" + spinner_label = ( + f"🔀 {goal_preview} · (/agents to monitor)" + if goal_preview + else "🔀 delegating · (/agents to monitor)" + ) spinner = None if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner(): face = random.choice(KawaiiSpinner.get_waiting_faces()) diff --git a/cli.py b/cli.py index 95d14f7a996..9045e4790bf 100644 --- a/cli.py +++ b/cli.py @@ -787,8 +787,10 @@ def AIAgent(*args, **kwargs): def get_tool_definitions(*args, **kwargs): + from hermes_cli.mcp_startup import wait_for_mcp_discovery from model_tools import get_tool_definitions as _get_tool_definitions + wait_for_mcp_discovery() return _get_tool_definitions(*args, **kwargs) @@ -896,9 +898,12 @@ def _prepare_deferred_agent_startup() -> None: exc_info=True, ) try: - from tools.mcp_tool import discover_mcp_tools + from hermes_cli.mcp_startup import start_background_mcp_discovery - discover_mcp_tools() + start_background_mcp_discovery( + logger=logger, + thread_name="termux-cli-mcp-discovery", + ) except Exception: logger.debug( "MCP tool discovery failed at deferred CLI startup", @@ -1537,9 +1542,17 @@ def _query_osc11_background() -> str | None: Most modern terminals reply with \x1b]11;rgb:RRRR/GGGG/BBBB\x1b\\ within a few ms. We wait up to 100ms total before giving up. Returns "#RRGGBB" or None on timeout / non-tty. + + Skipped over SSH: the round-trip routinely exceeds our 100ms budget, so a + late reply lands after prompt_toolkit has grabbed the tty — its payload + leaks in as typed text and the BEL terminator reads as Ctrl+G (open + editor), trapping the user in a stray editor. Remote sessions fall back to + COLORFGBG / env hints / the dark default instead. """ if not sys.stdin.isatty() or not sys.stdout.isatty(): return None + if any(os.environ.get(v) for v in ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY")): + return None try: import termios import tty @@ -1587,8 +1600,11 @@ def _query_osc11_background() -> str | None: r, g, b = norm(m.group(1)), norm(m.group(2)), norm(m.group(3)) return f"#{r:02X}{g:02X}{b:02X}" finally: + # TCSAFLUSH discards any unread input as it restores the original + # attributes — scrubs a slow/partial OSC 11 reply out of the tty + # buffer before prompt_toolkit can read it as keystrokes. try: - termios.tcsetattr(fd, termios.TCSANOW, old) + termios.tcsetattr(fd, termios.TCSAFLUSH, old) except Exception: pass @@ -4872,6 +4888,10 @@ class HermesCLI: if not self._ensure_runtime_credentials(): return False + from hermes_cli.mcp_startup import wait_for_mcp_discovery + + wait_for_mcp_discovery() + # Initialize SQLite session store for CLI sessions (if not already done in __init__) if self._session_db is None: try: @@ -12928,6 +12948,13 @@ class HermesCLI: if event.app.is_running: event.app.exit() event.app.current_buffer.reset(append_to_history=True) + # Force a repaint: process_command() prints through + # patch_stdout (scrolls output above the prompt) and never + # invalidates the app, so the just-cleared input area can + # keep showing the submitted text until some unrelated + # redraw fires. Every other early-return branch in this + # handler invalidates after reset — match them. + event.app.invalidate() return # Handle /steer while the agent is running immediately on the @@ -12939,6 +12966,13 @@ class HermesCLI: if self._should_handle_steer_command_inline(text, has_images=has_images): self.process_command(text) event.app.current_buffer.reset(append_to_history=True) + # Force a repaint after clearing the buffer. /steer is + # dispatched mid-run while the agent streams output through + # patch_stdout; process_command() never invalidates the + # app, so without this the submitted "/steer " can + # linger in the input area (looking unsent) and invite an + # accidental re-submit. See issue #34569. + event.app.invalidate() return # Snapshot and clear attached images diff --git a/gateway/config.py b/gateway/config.py index c8e23f2bf38..59fdfa54ed6 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -474,6 +474,13 @@ class GatewayConfig: # Delivery settings always_log_local: bool = True # Always save cron outputs to local files + # Drop outbound "silence narration" messages (e.g. *(silent)*, 🔇, a bare + # ".") pre-send. These are model hallucinations emitted when a persona has + # nothing actionable to say; in bot-to-bot channels they mirror back and + # forth, burning tokens and crashing models. Substrate-level guard that + # survives SOUL.md/prompt drift across providers. Opt out with False for + # raw passthrough. + filter_silence_narration: bool = True # STT settings stt_enabled: bool = True # Whether to auto-transcribe inbound voice messages @@ -582,6 +589,7 @@ class GatewayConfig: "quick_commands": self.quick_commands, "sessions_dir": str(self.sessions_dir), "always_log_local": self.always_log_local, + "filter_silence_narration": self.filter_silence_narration, "stt_enabled": self.stt_enabled, "group_sessions_per_user": self.group_sessions_per_user, "thread_sessions_per_user": self.thread_sessions_per_user, @@ -650,6 +658,9 @@ class GatewayConfig: quick_commands=quick_commands, sessions_dir=sessions_dir, always_log_local=_coerce_bool(data.get("always_log_local"), True), + filter_silence_narration=_coerce_bool( + data.get("filter_silence_narration"), True + ), stt_enabled=_coerce_bool(stt_enabled, True), group_sessions_per_user=_coerce_bool(group_sessions_per_user, True), thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False), @@ -757,21 +768,32 @@ def load_gateway_config() -> GatewayConfig: if "always_log_local" in yaml_cfg: gw_data["always_log_local"] = yaml_cfg["always_log_local"] + if "filter_silence_narration" in yaml_cfg: + gw_data["filter_silence_narration"] = yaml_cfg[ + "filter_silence_narration" + ] + if "unauthorized_dm_behavior" in yaml_cfg: gw_data["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior( yaml_cfg.get("unauthorized_dm_behavior"), "pair", ) - # Merge platforms section from config.yaml into gw_data so that - # nested keys like platforms.webhook.extra.routes are loaded. - yaml_platforms = yaml_cfg.get("platforms") + # Merge platform config into gw_data so runtime-only settings under + # ``gateway.platforms`` are loaded the same way as top-level + # ``platforms``. Merge nested first so top-level config keeps + # precedence, matching the existing gateway.streaming fallback. + gateway_cfg = yaml_cfg.get("gateway") + gateway_platforms = gateway_cfg.get("platforms") if isinstance(gateway_cfg, dict) else None platforms_data = gw_data.setdefault("platforms", {}) if not isinstance(platforms_data, dict): platforms_data = {} gw_data["platforms"] = platforms_data - if isinstance(yaml_platforms, dict): - for plat_name, plat_block in yaml_platforms.items(): + + def _merge_platform_map(source_platforms: Any) -> None: + if not isinstance(source_platforms, dict): + return + for plat_name, plat_block in source_platforms.items(): if not isinstance(plat_block, dict): continue existing = platforms_data.get(plat_name, {}) @@ -785,6 +807,10 @@ def load_gateway_config() -> GatewayConfig: if merged_extra: merged["extra"] = merged_extra platforms_data[plat_name] = merged + + _merge_platform_map(gateway_platforms) + _merge_platform_map(yaml_cfg.get("platforms")) + if platforms_data: gw_data["platforms"] = platforms_data # Iterate built-in platforms plus any registered plugin platforms # so plugin authors get the same shared-key bridging (#24836). @@ -890,6 +916,18 @@ def load_gateway_config() -> GatewayConfig: if entry.apply_yaml_config_fn is None: continue platform_cfg = yaml_cfg.get(entry.name) + # Fall back to the platform's block under ``platforms`` / + # ``gateway.platforms`` so adapter hooks still run when the + # user configured the platform only under those nested paths + # (e.g. ``platforms.discord.extra.allow_from``) and not via a + # top-level ``discord:`` block. + if not isinstance(platform_cfg, dict): + for _src in (gateway_platforms, yaml_cfg.get("platforms")): + if isinstance(_src, dict): + _candidate = _src.get(entry.name) + if isinstance(_candidate, dict): + platform_cfg = _candidate + break if not isinstance(platform_cfg, dict): continue try: diff --git a/gateway/delivery.py b/gateway/delivery.py index a1cbb299384..8afab431c36 100644 --- a/gateway/delivery.py +++ b/gateway/delivery.py @@ -9,6 +9,8 @@ Routes messages to the appropriate destination based on: """ import logging +import os +import re from pathlib import Path from datetime import datetime from dataclasses import dataclass @@ -21,6 +23,32 @@ logger = logging.getLogger(__name__) MAX_PLATFORM_OUTPUT = 4000 TRUNCATED_VISIBLE = 3800 +# Matches strings that are *only* a "silence" narration with optional markdown +# wrappers. Covers: *(silent)*, _silent_, `silent`, ~silent~, (silent), silent, +# 🔇, a bare ".", "…", and the whitespace/marker-padded variants seen in the +# wild. Anchored to start/end so substantive messages that merely *contain* the +# word "silent" are never matched. +_SILENCE_NARRATION = re.compile( + r'^[\s*_~`]*\(?\s*(silent|silence|no\s+response|no\s+reply)\s*\.?\)?[\s*_~`]*$' + r'|^[\s*_~`]*[\U0001F507\.\u2026]+[\s*_~`]*$', + re.IGNORECASE, +) + + +def _is_silence_narration(content: Optional[str]) -> bool: + """Return True when ``content`` is *only* a silence-narration token. + + Length-guarded (real messages are longer) and anchored to the whole string + so legitimate prose like "The deployment ran silently" or "Silence is + golden — here is the plan..." is never flagged. + """ + if not content: + return False + stripped = content.strip() + if not stripped or len(stripped) > 64: # length guard + return False + return bool(_SILENCE_NARRATION.match(stripped)) + from .config import Platform, GatewayConfig from .session import SessionSource @@ -261,6 +289,18 @@ class DeliveryRouter: path.write_text(content) return path + def _filter_silence_narration_enabled(self) -> bool: + """Whether the outbound silence-narration filter is active. + + ``HERMES_FILTER_SILENCE_NARRATION`` env var overrides config when set; + otherwise the ``gateway.filter_silence_narration`` config flag wins + (default True). + """ + env = os.getenv("HERMES_FILTER_SILENCE_NARRATION") + if env is not None: + return env.strip().lower() in ("1", "true", "yes", "on") + return bool(getattr(self.config, "filter_silence_narration", True)) + async def _deliver_to_platform( self, target: DeliveryTarget, @@ -286,6 +326,27 @@ class DeliveryRouter: + f"\n\n... [truncated, full output saved to {saved_path}]" ) + # Substrate-level anti-loop guard: drop hallucinated "silence narration" + # (*(silent)*, 🔇, a bare ".", etc.) before it ever reaches the adapter. + # In bot-to-bot channels these tokens mirror back and forth until a + # model crashes with "no content after all retries". Behavioral prompt + # rules drift across providers; this single chokepoint covers every + # platform adapter regardless of which persona's prompt failed. + # Local/file delivery (_deliver_local) is a separate path and is never + # filtered — saved silence has no loop risk. + if self._filter_silence_narration_enabled() and _is_silence_narration(content): + logger.warning( + "Dropped silence-narration outbound to %s (chat=%s): %r", + target.platform.value, + target.chat_id, + content[:40], + ) + return { + "success": True, + "filtered": "silence_narration", + "delivered": False, + } + send_metadata = dict(metadata or {}) is_named_telegram_private_topic = False named_telegram_private_topic_name: Optional[str] = None diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 6979a869148..e1b677f12a1 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1191,10 +1191,12 @@ _MEDIA_EXT_ALTERNATION = "|".join( # bare-path detector (extract_local_files) downstream rather than silently # deleted. Shared by the non-streaming dispatch path and the streaming # consumer so both behave identically. +# Path anchors: ``~/`` (Unix home-relative), ``/`` (Unix absolute), +# ``X:\\`` or ``X:/`` (Windows drive-letter absolute — #34632). MEDIA_TAG_CLEANUP_RE = re.compile( r'''[`"']?MEDIA:\s*''' r'''(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|''' - r'''(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:''' + _MEDIA_EXT_ALTERNATION + r'''))''' + r'''(?:~/|/|[A-Za-z]:[/\\])\S+(?:[^\S\n]+\S+)*?\.(?:''' + _MEDIA_EXT_ALTERNATION + r'''))''' r'''(?=[\s`"',;:)\]}]|$)[`"']?''', re.IGNORECASE, ) @@ -2665,9 +2667,10 @@ class BasePlatformAdapter(ABC): # (? (max 64 bytes) - buttons.append( - InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}") - ) - - rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)] - rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")]) - keyboard = InlineKeyboardMarkup(rows) + # Build provider buttons — folds provider groups (display only). + keyboard = self._build_provider_keyboard(providers) provider_label = get_label(current_provider) text = self.format_message( @@ -2865,6 +2852,56 @@ class TelegramAdapter(BasePlatformAdapter): _MODEL_PAGE_SIZE = 8 + def _build_provider_keyboard(self, providers: list): + """Build the top-level provider keyboard, folding provider groups. + + Provider families (Kimi/Moonshot, MiniMax, xAI Grok, ...) collapse to + a single ``mpg:`` button; tapping it drills into a member + sub-keyboard. Single providers (and groups with only one authenticated + member) render as direct ``mp:`` buttons. Grouping mirrors the + CLI ``hermes model`` picker via the shared ``group_providers`` fold, + so all surfaces stay consistent. + """ + try: + from hermes_cli.models import group_providers + except Exception: + group_providers = None + + by_slug = {p.get("slug"): p for p in providers} + + def _provider_button(p): + count = p.get("total_models", len(p.get("models", []))) + label = f"{p['name']} ({count})" + if p.get("is_current"): + label = f"✓ {label}" + return InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}") + + buttons: list = [] + if group_providers is not None: + for row in group_providers([p.get("slug") for p in providers]): + if row["kind"] == "group": + members = [by_slug[m] for m in row["members"] if m in by_slug] + count = sum( + m.get("total_models", len(m.get("models", []))) for m in members + ) + label = f"{row['label']} ▸ ({count})" + if any(m.get("is_current") for m in members): + label = f"✓ {label}" + buttons.append( + InlineKeyboardButton(label, callback_data=f"mpg:{row['group_id']}") + ) + else: + p = by_slug.get(row["slug"]) + if p is not None: + buttons.append(_provider_button(p)) + else: + for p in providers: + buttons.append(_provider_button(p)) + + rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)] + rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")]) + return InlineKeyboardMarkup(rows) + def _build_model_keyboard(self, models: list, page: int) -> tuple: """Build paginated model buttons. Returns (keyboard, page_info_text).""" page_size = self._MODEL_PAGE_SIZE @@ -3043,10 +3080,23 @@ class TelegramAdapter(BasePlatformAdapter): # Clean up state self._model_picker_state.pop(chat_id, None) - elif data == "mb": - # --- Back to provider list --- + elif data.startswith("mpg:"): + # --- Provider group selected: show member providers --- + group_id = data[4:] + try: + from hermes_cli.models import PROVIDER_GROUPS + _label, member_slugs = PROVIDER_GROUPS.get(group_id, ("", [])) + except Exception: + _label, member_slugs = "", [] + + by_slug = {p["slug"]: p for p in state["providers"]} + members = [by_slug[m] for m in member_slugs if m in by_slug] + if not members: + await query.answer(text="Group not found.") + return + buttons = [] - for p in state["providers"]: + for p in members: count = p.get("total_models", len(p.get("models", []))) label = f"{p['name']} ({count})" if p.get("is_current"): @@ -3054,11 +3104,30 @@ class TelegramAdapter(BasePlatformAdapter): buttons.append( InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}") ) - rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)] - rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")]) + rows.append([ + InlineKeyboardButton("◀ Back", callback_data="mb"), + InlineKeyboardButton("✗ Cancel", callback_data="mx"), + ]) keyboard = InlineKeyboardMarkup(rows) + await query.edit_message_text( + text=self.format_message( + ( + f"⚙ *Model Configuration*\n\n" + f"Provider family: *{_label or group_id}*\n\n" + f"Select a provider:" + ) + ), + parse_mode=ParseMode.MARKDOWN_V2, + reply_markup=keyboard, + ) + await query.answer() + + elif data == "mb": + # --- Back to provider list (folds groups) --- + keyboard = self._build_provider_keyboard(state["providers"]) + try: provider_label = get_label(state["current_provider"]) except Exception: @@ -3107,7 +3176,7 @@ class TelegramAdapter(BasePlatformAdapter): query_user_name = getattr(query.from_user, "first_name", None) # --- Model picker callbacks --- - if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")): + if data.startswith(("mp:", "mpg:", "mm:", "mb", "mx", "mg:")): chat_id = str(query.message.chat_id) if query.message else None if chat_id: await self._handle_model_picker_callback(query, data, chat_id) diff --git a/gateway/platforms/weixin.py b/gateway/platforms/weixin.py index 025bf052cce..36bb3dd21c2 100644 --- a/gateway/platforms/weixin.py +++ b/gateway/platforms/weixin.py @@ -1180,12 +1180,48 @@ class WeixinAdapter(BasePlatformAdapter): default=False, ) + # Text debounce batching (mirrors Telegram adapter pattern). + # iLink delivers messages individually, so rapid multi-message + # bursts (forwarded batches, paste-splits) each trigger a + # separate agent invocation. Default 3s delay / 5s split delay + # are tuned for iLink's typical delivery cadence. Tunable via + # config.yaml under + # ``gateway.platforms.weixin.extra.text_batch_delay_seconds`` / + # ``text_batch_split_delay_seconds``. + self._text_batch_delay_seconds = self._coerce_float_extra( + "text_batch_delay_seconds", 3.0 + ) + self._text_batch_split_delay_seconds = self._coerce_float_extra( + "text_batch_split_delay_seconds", 5.0 + ) + self._pending_text_batches: Dict[str, MessageEvent] = {} + self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {} + if self._account_id and not self._token: persisted = load_weixin_account(hermes_home, self._account_id) if persisted: self._token = str(persisted.get("token") or "").strip() self._base_url = str(persisted.get("base_url") or self._base_url).strip().rstrip("/") + def _coerce_float_extra(self, key: str, default: float) -> float: + """Read a float from ``config.extra``, guarding against bad/non-finite values. + + The result is fed directly to ``asyncio.sleep()``, so NaN/Inf and + unparseable values fall back to ``default``. + """ + import math + + value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None + if value is None: + return float(default) + try: + parsed = float(value) + except (TypeError, ValueError): + return float(default) + if not math.isfinite(parsed) or parsed < 0: + return float(default) + return parsed + @staticmethod def _coerce_list(value: Any) -> List[str]: if value is None: @@ -1247,6 +1283,11 @@ class WeixinAdapter(BasePlatformAdapter): async def disconnect(self) -> None: _LIVE_ADAPTERS.pop(self._token, None) self._running = False + for task in self._pending_text_batch_tasks.values(): + if not task.done(): + task.cancel() + self._pending_text_batches.clear() + self._pending_text_batch_tasks.clear() if self._poll_task and not self._poll_task.done(): self._poll_task.cancel() try: @@ -1395,12 +1436,10 @@ class WeixinAdapter(BasePlatformAdapter): timestamp=datetime.now(), ) logger.info("[%s] inbound from=%s type=%s media=%d", self.name, _safe_id(sender_id), source.chat_type, len(media_paths)) - await self.handle_message(event) - - @property - def enforces_own_access_policy(self) -> bool: - """Weixin gates DM/group access at intake via dm_policy/group_policy.""" - return True + if event.message_type == MessageType.TEXT: + self._enqueue_text_event(event) + else: + await self.handle_message(event) def _is_dm_allowed(self, sender_id: str) -> bool: if self._dm_policy == "disabled": @@ -1409,6 +1448,76 @@ class WeixinAdapter(BasePlatformAdapter): return sender_id in self._allow_from return True + @property + def enforces_own_access_policy(self) -> bool: + """Weixin gates DM/group access at intake via dm_policy/group_policy.""" + return True + + # ------------------------------------------------------------------ + # Text debounce batching + # ------------------------------------------------------------------ + + _SPLIT_THRESHOLD = 1800 # iLink chunks at ~2048 chars + + def _text_batch_key(self, event: MessageEvent) -> str: + """Session-scoped key for text message batching.""" + from gateway.session import build_session_key + return build_session_key( + event.source, + group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True), + thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False), + ) + + def _enqueue_text_event(self, event: MessageEvent) -> None: + """Buffer a text event and reset the flush timer. + + When users forward multiple messages or send rapid-fire texts + via WeChat, each arrives as a separate iLink message. This + concatenates them and waits for a short quiet period before + dispatching the combined message. + """ + key = self._text_batch_key(event) + existing = self._pending_text_batches.get(key) + chunk_len = len(event.text or "") + if existing is None: + event._last_chunk_len = chunk_len # type: ignore[attr-defined] + self._pending_text_batches[key] = event + else: + if event.text: + existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text + existing._last_chunk_len = chunk_len # type: ignore[attr-defined] + if event.media_urls: + existing.media_urls.extend(event.media_urls) + existing.media_types.extend(event.media_types) + + prior_task = self._pending_text_batch_tasks.get(key) + if prior_task and not prior_task.done(): + prior_task.cancel() + self._pending_text_batch_tasks[key] = asyncio.create_task( + self._flush_text_batch(key) + ) + + async def _flush_text_batch(self, key: str) -> None: + """Wait for quiet period then dispatch aggregated text.""" + current_task = asyncio.current_task() + try: + pending = self._pending_text_batches.get(key) + last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0 + if last_len >= self._SPLIT_THRESHOLD: + delay = self._text_batch_split_delay_seconds + else: + delay = self._text_batch_delay_seconds + await asyncio.sleep(delay) + if self._pending_text_batch_tasks.get(key) is not current_task: + return + event = self._pending_text_batches.pop(key, None) + if not event: + return + await self.handle_message(event) + finally: + if self._pending_text_batch_tasks.get(key) is current_task: + self._pending_text_batch_tasks.pop(key, None) + async def _collect_media(self, item: Dict[str, Any], media_paths: List[str], media_types: List[str]) -> None: item_type = item.get("type") if item_type == ITEM_IMAGE: diff --git a/gateway/platforms/whatsapp.py b/gateway/platforms/whatsapp.py index 0ca3d41fabb..703f774323f 100644 --- a/gateway/platforms/whatsapp.py +++ b/gateway/platforms/whatsapp.py @@ -278,6 +278,43 @@ class WhatsAppAdapter(BasePlatformAdapter): # notification before the normal "✓ whatsapp disconnected" fires. self._shutting_down: bool = False + # Text debounce batching (mirrors Telegram adapter pattern). + # WhatsApp often delivers multiple messages in rapid succession + # (e.g. forwarded batches, paste-splits) — without debounce each + # message triggers a separate agent invocation, wasting tokens and + # flooding the user with reply fragments. Default 5s delay / + # 10s split delay are conservative for WhatsApp's delivery cadence. + # Tunable via config.yaml under + # ``gateway.platforms.whatsapp.extra.text_batch_delay_seconds`` / + # ``text_batch_split_delay_seconds``. + self._text_batch_delay_seconds = self._coerce_float_extra( + "text_batch_delay_seconds", 5.0 + ) + self._text_batch_split_delay_seconds = self._coerce_float_extra( + "text_batch_split_delay_seconds", 10.0 + ) + self._pending_text_batches: Dict[str, MessageEvent] = {} + self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {} + + def _coerce_float_extra(self, key: str, default: float) -> float: + """Read a float from ``config.extra``, guarding against bad/non-finite values. + + The result is fed directly to ``asyncio.sleep()``, so NaN/Inf and + unparseable values fall back to ``default``. + """ + import math + + value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None + if value is None: + return float(default) + try: + parsed = float(value) + except (TypeError, ValueError): + return float(default) + if not math.isfinite(parsed) or parsed < 0: + return float(default) + return parsed + def _effective_reply_prefix(self) -> str: """Return the prefix the Node bridge will add in self-chat mode.""" whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat") @@ -1139,7 +1176,10 @@ class WhatsAppAdapter(BasePlatformAdapter): for msg_data in messages: event = await self._build_message_event(msg_data) if event: - await self.handle_message(event) + if event.message_type == MessageType.TEXT: + self._enqueue_text_event(event) + else: + await self.handle_message(event) except asyncio.CancelledError: break except Exception as e: @@ -1151,7 +1191,67 @@ class WhatsAppAdapter(BasePlatformAdapter): await asyncio.sleep(5) await asyncio.sleep(1) # Poll interval - + + # ── Text debounce batching ────────────────────────────────────── + + _SPLIT_THRESHOLD = 6000 # WhatsApp supports ~65K chars; generous threshold + + def _text_batch_key(self, event: MessageEvent) -> str: + """Session-scoped key for text message batching.""" + from gateway.session import build_session_key + return build_session_key( + event.source, + group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True), + thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False), + ) + + def _enqueue_text_event(self, event: MessageEvent) -> None: + """Buffer a text event and reset the flush timer. + + When WhatsApp delivers rapid-fire messages (e.g. forwarded + batches), this concatenates them and waits for a short quiet + period before dispatching the combined message. + """ + key = self._text_batch_key(event) + existing = self._pending_text_batches.get(key) + chunk_len = len(event.text or "") + if existing is None: + event._last_chunk_len = chunk_len # type: ignore[attr-defined] + self._pending_text_batches[key] = event + else: + if event.text: + existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text + existing._last_chunk_len = chunk_len # type: ignore[attr-defined] + if event.media_urls: + existing.media_urls.extend(event.media_urls) + existing.media_types.extend(event.media_types) + + prior_task = self._pending_text_batch_tasks.get(key) + if prior_task and not prior_task.done(): + prior_task.cancel() + self._pending_text_batch_tasks[key] = asyncio.create_task( + self._flush_text_batch(key) + ) + + async def _flush_text_batch(self, key: str) -> None: + """Wait for quiet period then dispatch aggregated text.""" + current_task = asyncio.current_task() + try: + pending = self._pending_text_batches.get(key) + last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0 + if last_len >= self._SPLIT_THRESHOLD: + delay = self._text_batch_split_delay_seconds + else: + delay = self._text_batch_delay_seconds + await asyncio.sleep(delay) + event = self._pending_text_batches.pop(key, None) + if not event: + return + await self.handle_message(event) + finally: + if self._pending_text_batch_tasks.get(key) is current_task: + self._pending_text_batch_tasks.pop(key, None) + async def _build_message_event(self, data: Dict[str, Any]) -> Optional[MessageEvent]: """Build a MessageEvent from bridge message data, downloading images to cache.""" try: diff --git a/gateway/run.py b/gateway/run.py index 570ccf7e31b..6e0c6cb33f2 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -1730,6 +1730,14 @@ class GatewayRunner: self._running_agents: Dict[str, Any] = {} self._running_agents_ts: Dict[str, float] = {} # start timestamp per session self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt + # Last successfully-resolved (non-empty) model, keyed by session. Used + # as a fallback when a fresh config read transiently returns an empty + # model (e.g. an mtime-keyed config-cache miss during a post-interrupt + # recovery turn). Without this, the agent is built with model="" and + # every API call fails HTTP 400 "No models provided" — the session goes + # silent until the user manually re-sends. See #35314. ``"*"`` holds a + # process-wide last-known-good for sessions seen for the first time. + self._last_resolved_model: Dict[str, str] = {} # Overflow buffer for explicit /queue commands. The adapter-level # _pending_messages dict is a single slot per session (designed for # "next-turn" follow-ups where repeated sends collapse into one @@ -2488,6 +2496,32 @@ class GatewayRunner: except Exception: pass + # Final safety net (#35314): if resolution still produced an empty + # model — e.g. a transient config-cache miss during a post-interrupt + # recovery turn returned an empty user_config — reuse the last model we + # successfully resolved for this session (or, failing that, the most + # recent one resolved process-wide). Building an agent with model="" + # makes every API call fail HTTP 400 "No models provided" and the + # session goes silent until the user manually re-sends. ``getattr`` + # guards against bare test runners built via ``object.__new__``. + _last_good = getattr(self, "_last_resolved_model", None) + if _last_good is not None: + if not model: + _recovered = _last_good.get(resolved_session_key or "") or _last_good.get("*") + if _recovered: + logger.warning( + "Empty model resolved for session=%s — recovering " + "last-known-good model %s (config read likely returned " + "empty; see #35314)", + resolved_session_key or "", _recovered, + ) + model = _recovered + elif model: + # Cache the good resolution for future recovery turns. + if resolved_session_key: + _last_good[resolved_session_key] = model + _last_good["*"] = model + return model, runtime_kwargs def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict: @@ -2784,10 +2818,12 @@ class GatewayRunner: """Mark a queued platform as paused — keep it in ``_failed_platforms`` but stop the reconnect watcher from hammering it. - Used by the circuit breaker after ``_PAUSE_AFTER_FAILURES`` consecutive - retryable failures, and by ``/platform pause `` for manual - intervention. Paused platforms are surfaced in ``/platform list`` - and resumed with ``/platform resume ``. + Used by ``/platform pause `` for manual operator intervention. + Paused platforms are surfaced in ``/platform list`` and resumed with + ``/platform resume ``. Note: the reconnect watcher does NOT + auto-pause — retryable (network/DNS) failures keep retrying at the + backoff cap indefinitely so a transient outage self-heals without + manual intervention. """ info = getattr(self, "_failed_platforms", {}).get(platform) if info is None: @@ -5865,15 +5901,17 @@ class GatewayRunner: """Background task that periodically retries connecting failed platforms. Uses exponential backoff: 30s → 60s → 120s → 240s → 300s (cap). - Retryable failures keep retrying at the backoff cap indefinitely - — but if a platform fails ``_PAUSE_AFTER_FAILURES`` times in a row - without ever succeeding, it is *paused*: kept in the retry queue - but no longer hammered. The user surfaces it with ``/platform list`` - and resumes it with ``/platform resume ``. Non-retryable - failures (bad auth, etc.) still drop out of the queue immediately. + Retryable failures (network/DNS blips) keep retrying at the backoff + cap indefinitely — they self-heal once connectivity returns, so a + transient outage never requires manual intervention. Non-retryable + failures (bad auth, etc.) drop out of the queue immediately. The + circuit breaker (``_pause_failed_platform`` / ``/platform pause``) + remains available for manual operator control via ``/platform list`` + and ``/platform resume ``, but is no longer triggered + automatically — auto-pausing a recovered platform was the cause of + bots silently staying dead after a transient DNS failure. """ _BACKOFF_CAP = 300 # 5 minutes max between retries - _PAUSE_AFTER_FAILURES = 10 # circuit-breaker threshold await asyncio.sleep(10) # initial delay — let startup finish while self._running: @@ -5968,14 +6006,14 @@ class GatewayRunner: "Reconnect %s failed, next retry in %ds", platform.value, backoff, ) - if attempt >= _PAUSE_AFTER_FAILURES: - self._pause_failed_platform( - platform, - reason=( - adapter.fatal_error_message - or "failed to reconnect" - ), - ) + # Retryable failures (network/DNS blips) keep retrying + # at the backoff cap indefinitely — they self-heal once + # connectivity returns. We do NOT auto-pause them: a + # transient outage must never require manual `/platform + # resume` to recover. Non-retryable failures (bad auth, + # etc.) already drop out of the queue via the + # `not fatal_error_retryable` branch above, so anything + # reaching here is by definition retryable. except Exception as e: self._update_platform_runtime_status( platform.value, @@ -5990,8 +6028,9 @@ class GatewayRunner: "Reconnect %s error: %s, next retry in %ds", platform.value, e, backoff, ) - if attempt >= _PAUSE_AFTER_FAILURES: - self._pause_failed_platform(platform, reason=str(e)) + # A raised exception during reconnect (connect timeout, DNS + # resolution failure, etc.) is inherently transient — keep + # retrying at the backoff cap rather than auto-pausing. # Check every 10 seconds for platforms that need reconnection for _ in range(10): @@ -10531,6 +10570,22 @@ class GatewayRunner: except Exception as exc: logger.warning("Picker model switch failed for cached agent: %s", exc) + # Persist the new model to the session DB so the + # dashboard shows the updated model (#34850). + _sess_db = getattr(_self, "_session_db", None) + if _sess_db is not None: + try: + _sess_entry = _self.session_store.get_or_create_session( + event.source + ) + _sess_db.update_session_model( + _sess_entry.session_id, result.new_model + ) + except Exception as exc: + logger.debug( + "Failed to persist model switch to DB: %s", exc + ) + # Store model note + session override if not hasattr(_self, "_pending_model_notes"): _self._pending_model_notes = {} @@ -10668,6 +10723,20 @@ class GatewayRunner: except Exception as exc: logger.warning("In-place model switch failed for cached agent: %s", exc) + # Persist the new model to the session DB so the dashboard + # shows the updated model (#34850). + _sess_db = getattr(self, "_session_db", None) + if _sess_db is not None: + try: + _sess_entry = self.session_store.get_or_create_session(source) + _sess_db.update_session_model( + _sess_entry.session_id, result.new_model + ) + except Exception as exc: + logger.debug( + "Failed to persist model switch to DB: %s", exc + ) + # Store a note to prepend to the next user message so the model # knows about the switch (avoids system messages mid-history). if not hasattr(self, "_pending_model_notes"): @@ -15313,8 +15382,52 @@ class GatewayRunner: ("compression", "target_ratio"), ("compression", "protect_last_n"), ("agent", "disabled_toolsets"), + ("memory", "provider"), ) + _HONCHO_CACHE_BUSTING_KEYS = ( + "honcho.peer_name", + "honcho.ai_peer", + "honcho.pin_peer_name", + "honcho.runtime_peer_prefix", + "honcho.user_peer_aliases", + ) + _HONCHO_CACHE_BUSTING_MEMO: dict[tuple[str, int | None], dict[str, Any]] = {} + + @classmethod + def _empty_honcho_cache_busting_config(cls) -> dict[str, Any]: + return {key: None for key in cls._HONCHO_CACHE_BUSTING_KEYS} + + @classmethod + def _extract_honcho_cache_busting_config(cls) -> dict[str, Any]: + """Extract Honcho identity keys, memoized by honcho.json mtime.""" + try: + from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path + + path = resolve_config_path() + try: + mtime_ns = path.stat().st_mtime_ns + except OSError: + mtime_ns = None + memo_key = (str(path), mtime_ns) + cached = cls._HONCHO_CACHE_BUSTING_MEMO.get(memo_key) + if cached is not None: + return dict(cached) + + hcfg = HonchoClientConfig.from_global_config(config_path=path) + aliases = hcfg.user_peer_aliases or {} + values = { + "honcho.peer_name": hcfg.peer_name, + "honcho.ai_peer": hcfg.ai_peer, + "honcho.pin_peer_name": bool(hcfg.pin_peer_name), + "honcho.runtime_peer_prefix": hcfg.runtime_peer_prefix or "", + "honcho.user_peer_aliases": sorted(aliases.items()) if isinstance(aliases, dict) else [], + } + cls._HONCHO_CACHE_BUSTING_MEMO = {memo_key: values} + return dict(values) + except Exception: + return cls._empty_honcho_cache_busting_config() + @classmethod def _extract_cache_busting_config(cls, user_config: dict | None) -> dict: """Pull values that must bust the cached agent. @@ -15345,26 +15458,12 @@ class GatewayRunner: out["tools.registry_generation"] = None # Honcho identity-mapping keys live in honcho.json, not user_config. - # HonchoSessionManager freezes the resolved peer_name / ai_peer / - # pin / aliases / prefix at construction; without busting here, - # mid-flight honcho.json edits go unread until the next unrelated - # cache eviction. - try: - from plugins.memory.honcho.client import HonchoClientConfig - - hcfg = HonchoClientConfig.from_global_config() - out["honcho.peer_name"] = hcfg.peer_name - out["honcho.ai_peer"] = hcfg.ai_peer - out["honcho.pin_peer_name"] = bool(hcfg.pin_peer_name) - out["honcho.runtime_peer_prefix"] = hcfg.runtime_peer_prefix or "" - aliases = hcfg.user_peer_aliases or {} - out["honcho.user_peer_aliases"] = sorted(aliases.items()) if isinstance(aliases, dict) else [] - except Exception: - out["honcho.peer_name"] = None - out["honcho.ai_peer"] = None - out["honcho.pin_peer_name"] = None - out["honcho.runtime_peer_prefix"] = None - out["honcho.user_peer_aliases"] = None + # Only read that file when Honcho is the active memory provider. + provider = cfg_get(cfg, "memory", "provider") + if isinstance(provider, str) and provider.lower() == "honcho": + out.update(cls._extract_honcho_cache_busting_config()) + else: + out.update(cls._empty_honcho_cache_busting_config()) return out @@ -17203,7 +17302,7 @@ class GatewayRunner: _hc = _hm.get("content", "") if "MEDIA:" in _hc: _TOOL_MEDIA_RE = re.compile( - r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|' + r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|' r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|' r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|' r'txt|csv|apk|ipa))', @@ -17529,7 +17628,7 @@ class GatewayRunner: content = msg.get("content", "") if "MEDIA:" in content: _TOOL_MEDIA_RE = re.compile( - r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|' + r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|' r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|' r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|' r'txt|csv|apk|ipa))', diff --git a/hermes_cli/banner.py b/hermes_cli/banner.py index c91b2f728c2..f25d03d2a87 100644 --- a/hermes_cli/banner.py +++ b/hermes_cli/banner.py @@ -12,14 +12,16 @@ import threading import time from pathlib import Path from hermes_constants import get_hermes_home -from typing import Dict, List, Optional +from typing import TYPE_CHECKING, Dict, List, Optional -from rich.console import Console -from rich.panel import Panel -from rich.table import Table - -from prompt_toolkit import print_formatted_text as _pt_print -from prompt_toolkit.formatted_text import ANSI as _PT_ANSI +# rich and prompt_toolkit are imported lazily (inside the functions that use +# them) rather than at module level. Importing this module is on the TUI +# gateway's critical startup path purely to reach the lightweight update-check +# helpers (``prefetch_update_check``); pulling rich.console + prompt_toolkit +# eagerly added ~50ms of wasted imports before ``gateway.ready`` could fire. +# Keep the type-only reference available to checkers without the runtime cost. +if TYPE_CHECKING: + from rich.console import Console logger = logging.getLogger(__name__) @@ -36,6 +38,8 @@ _RST = "\033[0m" def cprint(text: str): """Print ANSI-colored text through prompt_toolkit's renderer.""" + from prompt_toolkit import print_formatted_text as _pt_print + from prompt_toolkit.formatted_text import ANSI as _PT_ANSI _pt_print(_PT_ANSI(text)) @@ -471,7 +475,7 @@ def _display_toolset_name(toolset_name: str) -> str: ) -def build_welcome_banner(console: Console, model: str, cwd: str, +def build_welcome_banner(console: "Console", model: str, cwd: str, tools: List[dict] = None, enabled_toolsets: List[str] = None, session_id: str = None, @@ -490,6 +494,8 @@ def build_welcome_banner(console: Console, model: str, cwd: str, context_length: Model's context window size in tokens. """ from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS + from rich.panel import Panel + from rich.table import Table if get_toolset_for_tool is None: from model_tools import get_toolset_for_tool diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 55b76b58850..6e643338a98 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -286,9 +286,22 @@ def detect_install_method(project_root: Optional[Path] = None) -> str: Resolution order: 1. Stamped ``~/.hermes/.install_method`` file (written by installers) 2. HERMES_MANAGED env / .managed marker (NixOS, Homebrew) - 3. Container detection (/.dockerenv, /run/.containerenv, cgroup) - 4. .git directory presence -> 'git' - 5. Fallback -> 'pip' + 3. .git directory presence -> 'git' + 4. Fallback -> 'pip' + + Note: running inside a container is NOT treated as "docker" on its own. + The two supported install paths both self-identify via the + ``.install_method`` stamp (caught by step 1), so neither relies on + container detection here: + - the curl installer (scripts/install.sh, the README/website install + command) git-clones the repo and stamps ``git``; + - the published ``nousresearch/hermes-agent`` image stamps ``docker`` + at boot via ``docker/stage2-hook.sh``. + An unsupported manual install dropped into a container (no stamp) was + wrongly classified as the published image by bare container detection, + so ``hermes update`` bailed with "doesn't apply inside the Docker + container". Without that fallback such installs fall through to the + ``.git``/pip checks and behave like any off-path install. See issue #34397. """ stamp = get_hermes_home() / ".install_method" try: @@ -300,9 +313,6 @@ def detect_install_method(project_root: Optional[Path] = None) -> str: managed = get_managed_system() if managed: return managed.lower().replace(" ", "-") - from hermes_constants import is_container - if is_container(): - return "docker" if project_root is None: project_root = Path(__file__).parent.parent.resolve() if (project_root / ".git").is_dir(): @@ -320,6 +330,34 @@ def stamp_install_method(method: str) -> None: pass +def is_uv_tool_install() -> bool: + """Return True when the *running* Hermes lives in a ``uv tool`` layout. + + ``uv tool install hermes-agent`` places the install at + ``.../uv/tools/hermes-agent/...`` (default ``~/.local/share/uv/tools``, + or ``$UV_TOOL_DIR/...``). Such installs live outside any virtualenv, so + ``uv pip install`` fails with ``No virtual environment found`` and the + update path must use ``uv tool upgrade`` instead. + + Detection is intentionally restricted to properties of the running + interpreter (``sys.prefix`` / ``sys.executable``). We deliberately do + NOT consult ``uv tool list``: it would also return True when + ``hermes-agent`` happens to be uv-tool-installed on the machine while + the *active* Hermes is a regular pip/venv install, causing + ``hermes update`` to upgrade the wrong copy. It would also block on a + subprocess call (~seconds) just to compute a recommendation string. + """ + def _has_uv_tool_marker(path: str) -> bool: + norm = os.path.normpath(path).replace(os.sep, "/").lower() + return "/uv/tools/hermes-agent/" in norm + "/" + + if _has_uv_tool_marker(sys.prefix): + return True + if _has_uv_tool_marker(sys.executable or ""): + return True + return False + + def recommended_update_command_for_method(method: str) -> str: """Return the update command or guidance for a given install method.""" if method == "nixos": @@ -329,9 +367,10 @@ def recommended_update_command_for_method(method: str) -> str: if method == "docker": return "docker pull nousresearch/hermes-agent:latest" if method == "pip": + if is_uv_tool_install(): + return "uv tool upgrade hermes-agent" import shutil - uv = shutil.which("uv") - if uv: + if shutil.which("uv"): return "uv pip install --upgrade hermes-agent" return "pip install --upgrade hermes-agent" return "hermes update" @@ -1184,6 +1223,11 @@ DEFAULT_CONFIG = { # Mirrors `hermes -c` muscle memory. Default off so existing # users aren't surprised. HERMES_TUI_RESUME= always wins. "tui_auto_resume_recent": False, + # When true (default), `hermes --tui` drops a one-time hint + # ("subagents working · /agents to watch live") the first time a turn + # starts delegating, nudging the user toward the live spawn-tree + # dashboard. Set false to suppress the hint. + "tui_agents_nudge": True, "bell_on_complete": False, "show_reasoning": False, "streaming": False, @@ -1203,6 +1247,13 @@ DEFAULT_CONFIG = { # class of over-claim that otherwise forces users to run # `git status` to verify edits landed. Set false to suppress. "file_mutation_verifier": True, + # Turn-completion explainer. When true (default), the agent appends a + # one-line explanation to its final response whenever a turn ends + # abnormally with no usable reply — empty content after retries, a + # partial/truncated stream, a still-pending tool result, or an + # iteration/budget limit. Replaces the bare "(empty)" sentinel so the + # failure isn't silent from the UI's perspective. Set false to suppress. + "turn_completion_explainer": True, "show_cost": False, # Show $ cost in the status bar (off by default) "skin": "default", # UI language for static user-facing messages (approval prompts, a diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 3db70beaa72..4971f1faece 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -204,6 +204,60 @@ def _fail_and_issue(text: str, detail: str, fix: str, issues: list[str]) -> None issues.append(fix) +def _read_pyproject_version() -> str | None: + """Read the ``version = "..."`` from ``pyproject.toml`` at the project root. + + Returns None when running from an installed wheel (no pyproject.toml ships + with the package) or when the file can't be parsed. Reads only the + ``[project]`` version, ignoring any version strings that appear in other + tables. + """ + pyproject = PROJECT_ROOT / "pyproject.toml" + try: + text = pyproject.read_text(encoding="utf-8") + except OSError: + return None + in_project = False + for raw in text.splitlines(): + line = raw.strip() + if line.startswith("[") and line.endswith("]"): + in_project = line == "[project]" + continue + if in_project and line.startswith("version") and "=" in line: + value = line.split("=", 1)[1] + value = value.split("#", 1)[0].strip().strip("\"'") + return value or None + return None + + +def _check_version_consistency(issues: list[str]) -> None: + """Verify pyproject.toml version matches hermes_cli.__version__. + + A git conflict resolution (reset/merge) can revert one file without the + other, leaving ``hermes --version`` reporting a stale version while + ``pyproject.toml`` is current. Detect that drift so users can re-sync. + Silent no-op for installed wheels where pyproject.toml isn't present. + """ + try: + from hermes_cli import __version__ as init_version + except Exception: + return + pyproject_version = _read_pyproject_version() + if pyproject_version is None: + # Installed wheel or unreadable pyproject — nothing to cross-check. + return + if pyproject_version == init_version: + check_ok("Version files consistent", f"({init_version})") + else: + _fail_and_issue( + "Version mismatch between source files", + f"(pyproject.toml {pyproject_version} != hermes_cli/__init__.py {init_version})", + "Re-sync version files (e.g. run 'hermes update', or set " + "hermes_cli/__init__.py __version__ to match pyproject.toml)", + issues, + ) + + def _check_s6_supervision(issues: list[str]) -> None: """Inside a container under our s6 /init, surface what s6 sees. @@ -509,6 +563,10 @@ def run_doctor(args): check_ok("Virtual environment active") else: check_warn("Not in virtual environment", "(recommended)") + + # Detect drift between pyproject.toml and hermes_cli/__init__.py versions + # (a git conflict resolution can silently revert one but not the other). + _check_version_consistency(issues) _section("Required Packages") required_packages = [ diff --git a/hermes_cli/kanban_db.py b/hermes_cli/kanban_db.py index 5e465e87a6f..4711655249d 100644 --- a/hermes_cli/kanban_db.py +++ b/hermes_cli/kanban_db.py @@ -396,6 +396,41 @@ def workspaces_root(board: Optional[str] = None) -> Path: return board_dir(slug) / "workspaces" +def attachments_root(board: Optional[str] = None) -> Path: + """Return the directory under which task file attachments are stored. + + Mirrors :func:`worker_logs_dir` / :func:`workspaces_root`: anchored + per-board so attachments don't leak between projects. Each task gets + its own ``/.../attachments//`` subdirectory. + + ``HERMES_KANBAN_ATTACHMENTS_ROOT`` pins the path directly (highest + precedence) for tests and unusual deployments. + + ``default`` uses ``/kanban/attachments/``; other boards use + ``/kanban/boards//attachments/``. + + Workers (which run with full file-tool access) read attached files + by the absolute path surfaced in :func:`build_worker_context`. On the + local terminal backend — the default for kanban — that path resolves + directly. Remote backends (Docker/Modal) need this directory mounted; + see the kanban docs. + """ + override = os.environ.get("HERMES_KANBAN_ATTACHMENTS_ROOT", "").strip() + if override: + return Path(override).expanduser() + slug = _normalize_board_slug(board) + if slug is None: + slug = get_current_board() + if slug == DEFAULT_BOARD: + return kanban_home() / "kanban" / "attachments" + return board_dir(slug) / "attachments" + + +def task_attachments_dir(task_id: str, board: Optional[str] = None) -> Path: + """Return the per-task attachment directory ``//``.""" + return attachments_root(board=board) / task_id + + def worker_logs_dir(board: Optional[str] = None) -> Path: """Return the directory under which per-task worker logs are written. @@ -831,6 +866,20 @@ class Comment: created_at: int +@dataclass +class Attachment: + """In-memory view of a row from the ``task_attachments`` table.""" + + id: int + task_id: str + filename: str + stored_path: str + content_type: Optional[str] + size: int + uploaded_by: Optional[str] + created_at: int + + @dataclass class Event: id: int @@ -957,6 +1006,23 @@ CREATE TABLE IF NOT EXISTS task_runs ( error TEXT ); +-- Files attached to a task (PDFs, images, source documents). The blob +-- lives on disk under ``attachments_root(board)//``; +-- this row carries metadata + the absolute ``stored_path`` so the +-- dashboard can list/download and ``build_worker_context`` can surface +-- the absolute path to the worker (which has full file-tool access). See +-- #35338. +CREATE TABLE IF NOT EXISTS task_attachments ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + task_id TEXT NOT NULL, + filename TEXT NOT NULL, + stored_path TEXT NOT NULL, + content_type TEXT, + size INTEGER NOT NULL DEFAULT 0, + uploaded_by TEXT, + created_at INTEGER NOT NULL +); + -- Subscription from a gateway source (platform + chat + thread) to a -- task. The gateway's kanban-notifier watcher tails task_events and -- pushes ``completed`` / ``blocked`` / ``spawn_auto_blocked`` events to @@ -981,6 +1047,7 @@ CREATE INDEX IF NOT EXISTS idx_comments_task ON task_comments(task_id, c CREATE INDEX IF NOT EXISTS idx_events_task ON task_events(task_id, created_at); CREATE INDEX IF NOT EXISTS idx_runs_task ON task_runs(task_id, started_at); CREATE INDEX IF NOT EXISTS idx_runs_status ON task_runs(status); +CREATE INDEX IF NOT EXISTS idx_attachments_task ON task_attachments(task_id, created_at); CREATE INDEX IF NOT EXISTS idx_notify_task ON kanban_notify_subs(task_id); """ @@ -1637,6 +1704,140 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None: (new, old), ) + _rebuild_drifted_tables(conn) + + +# Legacy DBs defined these tables with a ``TEXT PRIMARY KEY`` id (or, for +# ``kanban_notify_subs``, a nullable ``TEXT last_event_id``). The current +# schema uses ``INTEGER PRIMARY KEY AUTOINCREMENT`` / ``INTEGER NOT NULL +# DEFAULT 0``. ``CREATE TABLE IF NOT EXISTS`` skips existing tables +# regardless of schema and ``_add_column_if_missing`` only adds columns, so +# neither can fix a drifted column type — the table must be rebuilt. See +# #35096. +# +# Each entry pairs the canonical CREATE TABLE with the CREATE INDEX +# statements that DROP TABLE would otherwise take down with it (including +# ``idx_events_run``, added by the additive pass above). To guard against +# this list drifting from SCHEMA_SQL, ``test_rebuilt_schema_matches_fresh`` +# asserts a rebuilt legacy DB is byte-identical to a fresh one. +_REBUILD_SPECS = { + "task_events": ( + "CREATE TABLE task_events (" + " id INTEGER PRIMARY KEY AUTOINCREMENT," + " task_id TEXT NOT NULL, run_id INTEGER, kind TEXT NOT NULL," + " payload TEXT, created_at INTEGER NOT NULL)", + ( + "CREATE INDEX idx_events_task ON task_events(task_id, created_at)", + "CREATE INDEX idx_events_run ON task_events(run_id, id)", + ), + ), + "task_comments": ( + "CREATE TABLE task_comments (" + " id INTEGER PRIMARY KEY AUTOINCREMENT," + " task_id TEXT NOT NULL, author TEXT NOT NULL, body TEXT NOT NULL," + " created_at INTEGER NOT NULL)", + ("CREATE INDEX idx_comments_task ON task_comments(task_id, created_at)",), + ), + "task_runs": ( + "CREATE TABLE task_runs (" + " id INTEGER PRIMARY KEY AUTOINCREMENT," + " task_id TEXT NOT NULL, profile TEXT, step_key TEXT," + " status TEXT NOT NULL, claim_lock TEXT, claim_expires INTEGER," + " worker_pid INTEGER, max_runtime_seconds INTEGER," + " last_heartbeat_at INTEGER, started_at INTEGER NOT NULL," + " ended_at INTEGER, outcome TEXT, summary TEXT, metadata TEXT," + " error TEXT)", + ( + "CREATE INDEX idx_runs_task ON task_runs(task_id, started_at)", + "CREATE INDEX idx_runs_status ON task_runs(status)", + ), + ), + "kanban_notify_subs": ( + "CREATE TABLE kanban_notify_subs (" + " task_id TEXT NOT NULL, platform TEXT NOT NULL, chat_id TEXT NOT NULL," + " thread_id TEXT NOT NULL DEFAULT '', user_id TEXT," + " notifier_profile TEXT, created_at INTEGER NOT NULL," + " last_event_id INTEGER NOT NULL DEFAULT 0," + " PRIMARY KEY (task_id, platform, chat_id, thread_id))", + ("CREATE INDEX idx_notify_task ON kanban_notify_subs(task_id)",), + ), +} + + +def _table_has_drifted(conn: sqlite3.Connection, table: str) -> bool: + """True when ``table`` still carries the legacy (pre-AUTOINCREMENT) shape.""" + info = conn.execute(f"PRAGMA table_info({table})").fetchall() + if not info: + return False # table absent — nothing to rebuild + if table == "kanban_notify_subs": + lei = next((c for c in info if c["name"] == "last_event_id"), None) + return lei is not None and (lei["type"] or "").upper() != "INTEGER" + # task_events / task_comments / task_runs: id must be INTEGER and a PK. + id_col = next((c for c in info if c["name"] == "id"), None) + if id_col is None: + return False + return not ((id_col["type"] or "").upper() == "INTEGER" and id_col["pk"]) + + +def _rebuild_drifted_tables(conn: sqlite3.Connection) -> None: + """Rebuild any kanban table whose column types drifted from SCHEMA_SQL. + + Old boards crash the gateway notifier (``int(None)`` on a NULL id in + ``unseen_events_for_sub``) and never match the ``id > cursor`` filter, so + every kanban notification is silently lost (#35096). Each affected table is + rebuilt with the standard SQLite pattern — CREATE new → INSERT shared + columns → DROP old → RENAME — recreating its indexes too (DROP TABLE takes + them down). The legacy TEXT ids are dropped (they aren't valid integers); + AUTOINCREMENT assigns fresh ones and ``last_event_id`` cursors reset to 0, + so the first post-migration tick replays a task's event history once — + the safe failure mode for a feature that was already fully broken. + + The whole pass runs in one transaction so an interruption can't leave a + table half-renamed, and under ``connect()``'s init locks so nothing races + it. Idempotent: a correctly-typed DB skips every table and returns without + opening a transaction. + """ + drifted = [t for t in _REBUILD_SPECS if _table_has_drifted(conn, t)] + if not drifted: + return + + conn.execute("BEGIN IMMEDIATE") + try: + for table in drifted: + create_sql, index_sqls = _REBUILD_SPECS[table] + old_cols = [c["name"] for c in conn.execute(f"PRAGMA table_info({table})")] + _log.info("kanban migration: rebuilding %s to match current schema", table) + conn.execute(f"ALTER TABLE {table} RENAME TO {table}_legacy") + conn.execute(create_sql) + new_cols = {c["name"] for c in conn.execute(f"PRAGMA table_info({table})")} + if table == "kanban_notify_subs": + # Cast the legacy TEXT cursor to INTEGER; NULL / non-numeric → 0. + shared = [c for c in old_cols if c in new_cols and c != "last_event_id"] + cols_csv = ", ".join(shared) + conn.execute( + f"INSERT INTO {table} ({cols_csv}, last_event_id) " + f"SELECT {cols_csv}, COALESCE(CAST(last_event_id AS INTEGER), 0) " + f"FROM {table}_legacy" + ) + else: + # Drop the legacy TEXT id; AUTOINCREMENT reassigns it. + shared = [c for c in old_cols if c in new_cols and c != "id"] + cols_csv = ", ".join(shared) + conn.execute( + f"INSERT INTO {table} ({cols_csv}) " + f"SELECT {cols_csv} FROM {table}_legacy" + ) + conn.execute(f"DROP TABLE {table}_legacy") + for index_sql in index_sqls: + conn.execute(index_sql) + conn.execute("COMMIT") + except Exception: + try: + conn.execute("ROLLBACK") + except sqlite3.OperationalError: + pass + raise + def _check_file_length_invariant(conn: sqlite3.Connection) -> None: """Read the SQLite header page_count and compare against actual file size. @@ -2252,6 +2453,121 @@ def list_comments(conn: sqlite3.Connection, task_id: str) -> list[Comment]: ] +# --------------------------------------------------------------------------- +# Attachments +# --------------------------------------------------------------------------- + +def add_attachment( + conn: sqlite3.Connection, + task_id: str, + *, + filename: str, + stored_path: str, + content_type: Optional[str] = None, + size: int = 0, + uploaded_by: Optional[str] = None, +) -> int: + """Record a file attachment for a task. Returns the new attachment id. + + The caller is responsible for writing the blob to ``stored_path`` + first (under :func:`task_attachments_dir`); this only persists the + metadata row and appends an ``attached`` event. + """ + if not filename or not filename.strip(): + raise ValueError("attachment filename is required") + if not stored_path or not stored_path.strip(): + raise ValueError("attachment stored_path is required") + now = int(time.time()) + with write_txn(conn): + if not conn.execute( + "SELECT 1 FROM tasks WHERE id = ?", (task_id,) + ).fetchone(): + raise ValueError(f"unknown task {task_id}") + cur = conn.execute( + "INSERT INTO task_attachments " + "(task_id, filename, stored_path, content_type, size, uploaded_by, created_at) " + "VALUES (?, ?, ?, ?, ?, ?, ?)", + ( + task_id, + filename.strip(), + stored_path, + content_type, + int(size), + uploaded_by, + now, + ), + ) + _append_event( + conn, + task_id, + "attached", + {"filename": filename.strip(), "size": int(size), "by": uploaded_by}, + ) + return int(cur.lastrowid or 0) + + +def list_attachments(conn: sqlite3.Connection, task_id: str) -> list[Attachment]: + rows = conn.execute( + "SELECT * FROM task_attachments WHERE task_id = ? ORDER BY created_at ASC, id ASC", + (task_id,), + ).fetchall() + return [ + Attachment( + id=r["id"], + task_id=r["task_id"], + filename=r["filename"], + stored_path=r["stored_path"], + content_type=r["content_type"], + size=r["size"] or 0, + uploaded_by=r["uploaded_by"], + created_at=r["created_at"], + ) + for r in rows + ] + + +def get_attachment(conn: sqlite3.Connection, attachment_id: int) -> Optional[Attachment]: + r = conn.execute( + "SELECT * FROM task_attachments WHERE id = ?", (attachment_id,) + ).fetchone() + if r is None: + return None + return Attachment( + id=r["id"], + task_id=r["task_id"], + filename=r["filename"], + stored_path=r["stored_path"], + content_type=r["content_type"], + size=r["size"] or 0, + uploaded_by=r["uploaded_by"], + created_at=r["created_at"], + ) + + +def delete_attachment(conn: sqlite3.Connection, attachment_id: int) -> Optional[Attachment]: + """Delete an attachment row and its on-disk blob. Returns the removed row. + + Returns ``None`` when no row matched. The blob is removed best-effort + (a missing file is not an error); the metadata row is the source of + truth for whether an attachment "exists". + """ + with write_txn(conn): + att = get_attachment(conn, attachment_id) + if att is None: + return None + conn.execute("DELETE FROM task_attachments WHERE id = ?", (attachment_id,)) + _append_event( + conn, att.task_id, "attachment_removed", {"filename": att.filename} + ) + try: + p = Path(att.stored_path) + if p.is_file(): + p.unlink() + except OSError: + pass + return att + + def list_events(conn: sqlite3.Connection, task_id: str) -> list[Event]: rows = conn.execute( "SELECT * FROM task_events WHERE task_id = ? ORDER BY created_at ASC, id ASC", @@ -2457,7 +2773,9 @@ def _has_sticky_block(conn: sqlite3.Connection, task_id: str) -> bool: return bool(row) and row["kind"] == "blocked" -def recompute_ready(conn: sqlite3.Connection) -> int: +def recompute_ready( + conn: sqlite3.Connection, failure_limit: int = None, +) -> int: """Promote ``todo`` tasks to ``ready`` when all parents are ``done`` or ``archived``. Returns the number of tasks promoted. Safe to call inside or outside @@ -2465,17 +2783,34 @@ def recompute_ready(conn: sqlite3.Connection) -> int: ``blocked`` tasks are also considered for promotion (so a task blocked purely by a parent dependency unblocks itself when the - parent completes), *except* when the most recent block event was a - worker-initiated ``kanban_block`` — those stay blocked until an - explicit ``kanban_unblock`` (#28712). Without that guard, a - ``review-required`` handoff would auto-respawn, the fresh worker - would find nothing to do, exit cleanly, get recorded as a protocol - violation, and the cycle would repeat indefinitely. + parent completes), *except* in two cases: + + 1. The most recent block event was a worker-initiated + ``kanban_block`` — those stay blocked until an explicit + ``kanban_unblock`` (#28712). + + 2. The task's ``consecutive_failures`` has reached the effective + failure limit. This prevents infinite retry loops when a task + repeatedly exhausts its iteration budget: without this guard the + counter would reset on every recovery cycle and the circuit + breaker could never trip (#35072). + + The effective failure limit resolves in the same order as the + circuit breaker in ``_record_task_failure`` so the two never + disagree about when a task is permanently blocked: + + 1. per-task ``max_retries`` if set + 2. caller-supplied ``failure_limit`` (the dispatcher passes the + ``kanban.failure_limit`` config value through ``dispatch_once``) + 3. ``DEFAULT_FAILURE_LIMIT`` """ + if failure_limit is None: + failure_limit = DEFAULT_FAILURE_LIMIT promoted = 0 with write_txn(conn): todo_rows = conn.execute( - "SELECT id, status FROM tasks WHERE status IN ('todo', 'blocked')" + "SELECT id, status, consecutive_failures, max_retries " + "FROM tasks WHERE status IN ('todo', 'blocked')" ).fetchall() for row in todo_rows: task_id = row["id"] @@ -2493,13 +2828,25 @@ def recompute_ready(conn: sqlite3.Connection) -> int: (task_id,), ).fetchall() if all(p["status"] in ("done", "archived") for p in parents): - # Blocked tasks also get their failure counters reset — - # this is effectively an auto-unblock (circuit-breaker - # recovery; worker-initiated blocks are skipped above). if cur_status == "blocked": + # Don't auto-recover tasks that have hit the + # circuit-breaker failure limit. Without this + # guard, a task that repeatedly exhausts its + # iteration budget would cycle forever: + # block → auto-recover → respawn → budget + # exhausted → block → … The counter must also + # be preserved so the breaker can accumulate + # across recovery cycles. + failures = int(row["consecutive_failures"] or 0) + task_limit = row["max_retries"] + effective_limit = ( + int(task_limit) if task_limit is not None + else int(failure_limit) + ) + if failures >= effective_limit: + continue conn.execute( - "UPDATE tasks SET status = 'ready', " - "consecutive_failures = 0, last_failure_error = NULL " + "UPDATE tasks SET status = 'ready' " "WHERE id = ? AND status = 'blocked'", (task_id,), ) @@ -5424,7 +5771,7 @@ def dispatch_once( if _crash_auto_blocked: result.auto_blocked.extend(_crash_auto_blocked) result.timed_out = enforce_max_runtime(conn) - result.promoted = recompute_ready(conn) + result.promoted = recompute_ready(conn, failure_limit=failure_limit) # Count tasks already running so max_spawn enforces concurrency rather # than a per-tick spawn budget. See the docstring above for the full @@ -6300,6 +6647,25 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str: lines.append(_cap(task.body, _CTX_MAX_BODY_BYTES)) lines.append("") + # Attachments — files uploaded to this task (PDFs, source docs, + # images). Surface the absolute on-disk path so the worker, which has + # full file-tool access, can read them directly (read_file, terminal + # `pdftotext`, etc.). On the local terminal backend the path resolves + # as-is; remote backends need the kanban attachments dir mounted. + attachments = list_attachments(conn, task_id) + if attachments: + lines.append("## Attachments") + lines.append( + "Files attached to this task. Read them with the file/terminal " + "tools at the absolute paths below:" + ) + for att in attachments: + size_kb = max(1, (att.size + 1023) // 1024) if att.size else 0 + size_str = f", {size_kb} KB" if size_kb else "" + ctype = f", {att.content_type}" if att.content_type else "" + lines.append(f"- `{att.filename}`{ctype}{size_str} → `{att.stored_path}`") + lines.append("") + # Prior attempts — show closed runs so a retrying worker sees the # history. Skip the currently-active run (that's this worker). # Cap at _CTX_MAX_PRIOR_ATTEMPTS most-recent closed runs; older diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 1f75e8bc8fe..4211a73dd8e 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -65,6 +65,46 @@ import os import sys +def _set_process_title() -> None: + """Set the process title to 'hermes' so tools like 'ps', 'top', and + 'htop' show the app name instead of 'python3.xx'. + + Purely cosmetic — non-fatal on any platform. + + Strategy (try in order): + 1. ``setproctitle`` (opt-in dep — installed via ``hermes tools`` or + ``pip install setproctitle``, or bundled in a future release). + 2. ctypes ``prctl(PR_SET_NAME)`` (Linux only, 15-char limit). + 3. ctypes ``pthread_setname_np`` (macOS only, kernel thread name — + changes lldb/top but not ``ps aux``). + 4. No-op on Windows (the .exe name is already ``hermes.exe``). + """ + # Strategy 1: setproctitle (best — works on macOS, Linux, BSD) + try: + import setproctitle # type: ignore[import-untyped] + + setproctitle.setproctitle("hermes") + return + except ImportError: + pass + + # Strategy 2/3: platform-specific ctypes fallback + import ctypes + import platform + + try: + system = platform.system() + if system == "Linux": + libc = ctypes.CDLL("libc.so.6", use_errno=True) + libc.prctl(15, b"hermes", 0, 0, 0) # PR_SET_NAME = 15 + elif system == "Darwin": + libc = ctypes.CDLL("libc.dylib", use_errno=True) + libc.pthread_setname_np(b"hermes") + # Windows: the .exe name is already ``hermes.exe`` — nothing to do. + except Exception: + pass + + # Mouse-tracking residue suppression — runs BEFORE every other import on the # TUI hot path so the terminal stops emitting SGR/X10 mouse reports while the # Python launcher is still doing imports (≈100–300ms in cooked + echo mode, @@ -2385,7 +2425,12 @@ def select_provider_and_model(args=None): if active == "openrouter" and get_env_value("OPENAI_BASE_URL"): active = "custom" - from hermes_cli.models import CANONICAL_PROVIDERS, _PROVIDER_LABELS + from hermes_cli.models import ( + CANONICAL_PROVIDERS, + _PROVIDER_LABELS, + group_providers, + provider_group_for_slug, + ) provider_labels = dict(_PROVIDER_LABELS) # derive from canonical list if active and active in _custom_provider_map: @@ -2398,8 +2443,43 @@ def select_provider_and_model(args=None): print(f" Active provider: {active_label}") print() - # Step 1: Provider selection — flat list from CANONICAL_PROVIDERS - all_providers = [(p.slug, p.tui_desc) for p in CANONICAL_PROVIDERS] + # Step 1: Provider selection. + # + # Canonical providers are folded into top-level groups (display only — see + # PROVIDER_GROUPS in hermes_cli/models.py). A multi-member group shows one + # row ("Kimi / Moonshot ▸"); picking it opens a member sub-picker that + # resolves back to a concrete slug, so the dispatch chain below is + # unchanged. Custom providers and the trailing actions stay flat. + canonical_descs = {p.slug: p.tui_desc for p in CANONICAL_PROVIDERS} + grouped_rows = group_providers([p.slug for p in CANONICAL_PROVIDERS]) + + # The group/slug that should be pre-selected: the active provider's group + # if it's grouped, otherwise the active slug itself. + active_group = provider_group_for_slug(active) if active else "" + + # ordered entries: (key, label, members) + # members == [] → leaf row, key is a provider slug / action + # members != [] → group row, key is "group:" + ordered: list[tuple[str, str, list[str]]] = [] + default_idx = 0 + for row in grouped_rows: + if row["kind"] == "group": + gid = row["group_id"] + label = f"{row['label']} ▸" + key = f"group:{gid}" + is_active = bool(active_group) and gid == active_group + members = row["members"] + else: + slug = row["slug"] + label = canonical_descs.get(slug, provider_labels.get(slug, slug)) + key = slug + is_active = bool(active) and slug == active + members = [] + if is_active: + ordered.append((key, f"{label} ← currently active", members)) + default_idx = len(ordered) - 1 + else: + ordered.append((key, label, members)) for key, provider_info in _custom_provider_map.items(): name = provider_info["name"] @@ -2407,36 +2487,49 @@ def select_provider_and_model(args=None): short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/") saved_model = provider_info.get("model", "") model_hint = f" — {saved_model}" if saved_model else "" - all_providers.append((key, f"{name} ({short_url}){model_hint}")) - - # Build the menu - ordered = [] - default_idx = 0 - for key, label in all_providers: + label = f"{name} ({short_url}){model_hint}" if active and key == active: - ordered.append((key, f"{label} ← currently active")) + ordered.append((key, f"{label} ← currently active", [])) default_idx = len(ordered) - 1 else: - ordered.append((key, label)) + ordered.append((key, label, [])) - ordered.append(("custom", "Custom endpoint (enter URL manually)")) + ordered.append(("custom", "Custom endpoint (enter URL manually)", [])) _has_saved_custom_list = isinstance(config.get("custom_providers"), list) and bool( config.get("custom_providers") ) if _has_saved_custom_list: - ordered.append(("remove-custom", "Remove a saved custom provider")) - ordered.append(("aux-config", "Configure auxiliary models...")) - ordered.append(("cancel", "Leave unchanged")) + ordered.append(("remove-custom", "Remove a saved custom provider", [])) + ordered.append(("aux-config", "Configure auxiliary models...", [])) + ordered.append(("cancel", "Leave unchanged", [])) provider_idx = _prompt_provider_choice( - [label for _, label in ordered], + [label for _, label, _ in ordered], default=default_idx, ) if provider_idx is None or ordered[provider_idx][0] == "cancel": print("No change.") return - selected_provider = ordered[provider_idx][0] + selected_key = ordered[provider_idx][0] + selected_members = ordered[provider_idx][2] + + # Group row → drill into a member sub-picker. Default to the active member + # if the active provider lives in this group. + if selected_members: + member_default = 0 + if active in selected_members: + member_default = selected_members.index(active) + member_labels = [ + canonical_descs.get(m, provider_labels.get(m, m)) for m in selected_members + ] + member_idx = _prompt_provider_choice(member_labels, default=member_default) + if member_idx is None: + print("No change.") + return + selected_provider = selected_members[member_idx] + else: + selected_provider = selected_key if selected_provider == "aux-config": _aux_config_menu() @@ -8008,39 +8101,6 @@ def _detect_concurrent_hermes_instances( except Exception: return [] - # Build a set of PIDs to exclude: the Python process itself plus its - # entire parent chain. On Windows the setuptools-generated hermes.exe - # launcher is a separate native process that spawns python.exe (the - # interpreter that runs our code). os.getpid() returns the Python PID, - # but the launcher (which holds the file lock) is the parent. Without - # walking the parent chain, every ``hermes update`` reports its own - # launcher as a concurrent instance — a false positive. - if exclude_pid is not None: - exclude_pids: set[int] = {exclude_pid} - else: - exclude_pids = {os.getpid()} - # The parent-walk is best-effort: if psutil rejects a PID (NoSuchProcess / - # AccessDenied) we stop walking and use whatever we've collected so far. - # Broader Exception catch on the outer block guards against partially- - # stubbed psutil in unit tests (e.g. a SimpleNamespace lacking Process / - # NoSuchProcess) — the surrounding update flow documents this helper as - # "never raises". - try: - current = psutil.Process(next(iter(exclude_pids))) - while True: - try: - parent = current.parent() - except Exception: - break - if parent is None or parent.pid <= 0: - break - if parent.pid in exclude_pids: - break # loop detected - exclude_pids.add(parent.pid) - current = parent - except Exception: - pass - # Resolve every shim path to its canonical form once for cheap comparison. shim_paths: set[str] = set() for shim in _hermes_exe_shims(scripts_dir): @@ -8051,6 +8111,56 @@ def _detect_concurrent_hermes_instances( if not shim_paths: return [] + # Build a set of PIDs to exclude: the Python process itself plus every + # ancestor whose executable is one of our shims. On Windows the + # setuptools-generated hermes.exe launcher is a separate native process + # that spawns python.exe (the interpreter that runs our code). + # os.getpid() returns the Python PID, but the launcher (which holds the + # file lock) is the parent. Without excluding it, every ``hermes update`` + # reports its own launcher as a concurrent instance — a false positive + # (issues #29341, #34795). + # + # Two robustness points learned from the field: + # 1. Use ``proc.parents()`` — it returns the WHOLE ancestor list in one + # call. The earlier per-hop ``current.parent()`` loop bailed on the + # first psutil error (AccessDenied/NoSuchProcess is common on Windows + # across session/elevation boundaries), leaving the launcher shim in + # the candidate set and re-triggering the false positive. + # 2. Only exclude ancestors whose exe is itself a shim. A genuine second + # hermes.exe sitting *under* a non-Hermes parent (e.g. a Hermes + # Desktop backend child) must still be flagged, so we don't blanket- + # exclude unrelated ancestors like the shell or terminal. + # Broad ``except Exception`` guards against partially-stubbed psutil in + # unit tests; this helper is documented as "never raises". + if exclude_pid is not None: + exclude_pids: set[int] = {int(exclude_pid)} + else: + exclude_pids = {os.getpid()} + try: + seed = next(iter(exclude_pids)) + try: + ancestors = psutil.Process(seed).parents() + except Exception: + ancestors = [] + for ancestor in ancestors: + try: + anc_exe = ancestor.exe() + except Exception: + continue + if not anc_exe: + continue + try: + anc_norm = str(Path(anc_exe).resolve()).lower() + except (OSError, ValueError): + anc_norm = str(anc_exe).lower() + if anc_norm in shim_paths: + try: + exclude_pids.add(int(ancestor.pid)) + except Exception: + continue + except Exception: + pass + matches: list[tuple[int, str]] = [] try: proc_iter = psutil.process_iter(["pid", "exe", "name"]) @@ -8091,6 +8201,13 @@ def _format_concurrent_instances_message( lines.append("") lines.append(" Close Hermes Desktop, exit any open `hermes` REPLs, and") lines.append(" stop the gateway (`hermes gateway stop`) before retrying.") + lines.append("") + if matches: + pid_args = " ".join(f"/PID {pid}" for pid, _ in matches) + lines.append(" If you've already closed everything and these PIDs are") + lines.append(" stale, terminate them directly, then retry the update:") + lines.append(f" taskkill {pid_args} /F") + lines.append("") lines.append(" Override with `hermes update --force` if you've already") lines.append(" confirmed those processes will not write to the venv.") return "\n".join(lines) @@ -9055,18 +9172,51 @@ def cmd_update(args): def _cmd_update_pip(args): """Update Hermes via pip (for PyPI installs).""" from hermes_cli import __version__ + from hermes_cli.config import is_uv_tool_install print(f"→ Current version: {__version__}") print("→ Checking PyPI for updates...") uv = shutil.which("uv") - if uv: + in_venv = sys.prefix != sys.base_prefix + # pipx-managed installs live under .../pipx/venvs//... + pipx_managed = "pipx" in sys.prefix.split(os.sep) + pipx = shutil.which("pipx") if pipx_managed else None + + # Only the ``uv pip install`` path inside a venv needs VIRTUAL_ENV + # exported (uv refuses to install without it when the launcher shim + # didn't activate the venv). ``uv tool upgrade`` / ``pipx upgrade`` + # operate on a named environment and ignore VIRTUAL_ENV, so we don't + # set it for them. + export_virtualenv = False + + if is_uv_tool_install(): + if not uv: + print("✗ Detected a uv-tool install but `uv` is not on PATH; install uv and retry.") + sys.exit(1) + cmd = [uv, "tool", "upgrade", "hermes-agent"] + elif pipx_managed and pipx: + # pipx owns its own venv; ``pipx upgrade`` is the only correct path. + # Matches scripts/auto-update.sh, which already uses pipx upgrade. + cmd = [pipx, "upgrade", "hermes-agent"] + elif uv: cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"] + if in_venv: + # Launcher shim runs the venv interpreter but doesn't export + # VIRTUAL_ENV; without it uv errors "No virtual environment found". + export_virtualenv = True + else: + # Outside any venv, ``--system`` lets uv target the active + # interpreter, matching pip's default behaviour. + cmd.insert(3, "--system") else: cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"] print(f"→ Running: {' '.join(cmd)}") - result = subprocess.run(cmd) + run_kwargs = {} + if export_virtualenv: + run_kwargs["env"] = {**os.environ, "VIRTUAL_ENV": sys.prefix} + result = subprocess.run(cmd, **run_kwargs) if result.returncode != 0: print("✗ Update failed") sys.exit(1) @@ -11157,6 +11307,13 @@ def cmd_completion(args, parser=None): print(generate_bash(parser)) +def cmd_prompt_size(args): + """Show a byte/char breakdown of the system prompt + tool schemas.""" + from hermes_cli.prompt_size import cmd_prompt_size as _impl + + _impl(args) + + def cmd_logs(args): """View and filter Hermes log files.""" from hermes_cli.logs import tail_log, list_logs @@ -11193,6 +11350,7 @@ _BUILTIN_SUBCOMMANDS = frozenset( "dump", "fallback", "gateway", "hooks", "import", "insights", "gui", "desktop", "kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate", "model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy", + "prompt-size", "send", "sessions", "setup", "skills", "slack", "status", "tools", "uninstall", "update", "version", "webhook", "whatsapp", "chat", "secrets", "security", @@ -11293,6 +11451,26 @@ _AGENT_SUBCOMMANDS = { } +def _is_tui_chat_launch(args) -> bool: + return bool(getattr(args, "tui", False) or os.environ.get("HERMES_TUI") == "1") + + +def _command_has_dedicated_mcp_startup(args) -> bool: + if args.command == "acp": + return True + if args.command == "gateway" and getattr(args, "gateway_command", None) == "run": + return True + if args.command == "cron" and getattr(args, "cron_command", None) in {"run", "tick"}: + return True + return False + + +def _should_background_mcp_startup(args) -> bool: + if _is_tui_chat_launch(args): + return False + return args.command in {None, "chat", "rl"} + + def _prepare_agent_startup(args) -> None: """Discover plugins/MCP/hooks for commands that can run an agent turn.""" _sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None)) @@ -11312,19 +11490,42 @@ def _prepare_agent_startup(args) -> None: "plugin discovery failed at CLI startup", exc_info=True, ) - try: - # MCP tool discovery — no event loop running in CLI/TUI startup, - # so inline is safe. Moved here from model_tools.py module scope - # to avoid freezing the gateway's event loop on its first message - # via the same lazy import path (#16856). - from tools.mcp_tool import discover_mcp_tools + _run_inline_mcp_discovery = True + if _is_tui_chat_launch(args): + # The TUI launcher hands off to a dedicated startup path that already + # backgrounds MCP discovery with a bounded join before the first tool + # snapshot. + _run_inline_mcp_discovery = False + elif _command_has_dedicated_mcp_startup(args): + # These entrypoints already do their own MCP startup later on the real + # runtime path (gateway executor, ACP launcher, cron job runner). + _run_inline_mcp_discovery = False + elif _should_background_mcp_startup(args): + try: + from hermes_cli.mcp_startup import start_background_mcp_discovery - discover_mcp_tools() - except Exception: - logger.debug( - "MCP tool discovery failed at CLI startup", - exc_info=True, - ) + start_background_mcp_discovery( + logger=logger, + thread_name="cli-mcp-discovery", + ) + except Exception: + logger.debug( + "Background MCP tool discovery failed at CLI startup", + exc_info=True, + ) + _run_inline_mcp_discovery = False + if _run_inline_mcp_discovery: + try: + # MCP tool discovery remains synchronous for entrypoints that do + # not own a later bounded/executor startup path. + from tools.mcp_tool import discover_mcp_tools + + discover_mcp_tools() + except Exception: + logger.debug( + "MCP tool discovery failed at CLI startup", + exc_info=True, + ) try: from hermes_cli.config import load_config from agent.shell_hooks import register_from_config @@ -11465,6 +11666,10 @@ def _try_termux_fast_tui_launch() -> bool: def main(): """Main entry point for hermes CLI.""" + # Cosmetic: make the process show up as 'hermes' instead of 'python3.11' + # in ps/top/htop. Non-fatal — just a nicer UX. + _set_process_title() + # Force UTF-8 stdio on Windows before anything prints. No-op elsewhere. try: from hermes_cli.stdio import configure_windows_stdio @@ -13218,9 +13423,15 @@ Examples: ), ) memory_sub = memory_parser.add_subparsers(dest="memory_command") - memory_sub.add_parser( + _setup_parser = memory_sub.add_parser( "setup", help="Interactive provider selection and configuration" ) + _setup_parser.add_argument( + "provider", + nargs="?", + default=None, + help="Provider to configure directly (e.g. honcho), skipping the picker", + ) memory_sub.add_parser("status", help="Show current memory provider config") memory_sub.add_parser("off", help="Disable external provider (built-in only)") _reset_parser = memory_sub.add_parser( @@ -14471,6 +14682,30 @@ Examples: ) logs_parser.set_defaults(func=cmd_logs) + # ========================================================================= + # prompt-size command + # ========================================================================= + prompt_size_parser = subparsers.add_parser( + "prompt-size", + help="Show a byte breakdown of the system prompt + tool schemas", + description=( + "Report the fixed prompt budget for a fresh session: system " + "prompt total, skills index, memory, user profile, and tool-schema " + "JSON. Runs offline (no API call)." + ), + ) + prompt_size_parser.add_argument( + "--platform", + default="cli", + help="Platform to simulate (cli, telegram, discord, ...). Default: cli", + ) + prompt_size_parser.add_argument( + "--json", + action="store_true", + help="Emit the breakdown as JSON", + ) + prompt_size_parser.set_defaults(func=cmd_prompt_size) + # ========================================================================= # Parse and execute # ========================================================================= diff --git a/hermes_cli/mcp_startup.py b/hermes_cli/mcp_startup.py new file mode 100644 index 00000000000..6d81853bca0 --- /dev/null +++ b/hermes_cli/mcp_startup.py @@ -0,0 +1,59 @@ +"""Shared CLI/TUI-safe helpers for background MCP discovery.""" + +from __future__ import annotations + +import threading +from typing import Optional + +_mcp_discovery_lock = threading.Lock() +_mcp_discovery_started = False +_mcp_discovery_thread: Optional[threading.Thread] = None + + +def _has_configured_mcp_servers() -> bool: + """Cheap config probe so non-MCP users avoid importing the MCP stack.""" + try: + from hermes_cli.config import read_raw_config + + mcp_servers = (read_raw_config() or {}).get("mcp_servers") + return isinstance(mcp_servers, dict) and len(mcp_servers) > 0 + except Exception: + # Be conservative: if config probing fails, try discovery in the + # background so startup still can't block. + return True + + +def start_background_mcp_discovery(*, logger, thread_name: str) -> None: + """Spawn one shared background MCP discovery thread for this process.""" + global _mcp_discovery_started, _mcp_discovery_thread + + with _mcp_discovery_lock: + if _mcp_discovery_started: + return + _mcp_discovery_started = True + if not _has_configured_mcp_servers(): + return + + def _discover() -> None: + try: + from tools.mcp_tool import discover_mcp_tools + + discover_mcp_tools() + except Exception: + logger.debug("Background MCP tool discovery failed", exc_info=True) + + thread = threading.Thread( + target=_discover, + name=thread_name, + daemon=True, + ) + _mcp_discovery_thread = thread + thread.start() + + +def wait_for_mcp_discovery(timeout: float = 0.75) -> None: + """Briefly wait for background MCP discovery before the first tool snapshot.""" + thread = _mcp_discovery_thread + if thread is None or not thread.is_alive(): + return + thread.join(timeout=timeout) diff --git a/hermes_cli/memory_setup.py b/hermes_cli/memory_setup.py index cac13bf781d..a75c10b0229 100644 --- a/hermes_cli/memory_setup.py +++ b/hermes_cli/memory_setup.py @@ -452,7 +452,11 @@ def memory_command(args) -> None: """Route memory subcommands.""" sub = getattr(args, "memory_command", None) if sub == "setup": - cmd_setup(args) + provider = getattr(args, "provider", None) + if provider: + cmd_setup_provider(provider) + else: + cmd_setup(args) elif sub == "status": cmd_status(args) else: diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 42eadfd7629..fba6ec94cfd 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -936,6 +936,105 @@ _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS} _PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider +# --------------------------------------------------------------------------- +# Provider groups — DISPLAY ONLY +# +# Some vendors expose several Hermes provider slugs (one per endpoint / +# auth method: global API, China API, OAuth coding plan, ...). Listing every +# slug as a top-level row in the interactive `hermes model` / setup wizard / +# Telegram `/model` pickers makes that list long and noisy. +# +# These groups fold related slugs under one top-level row in INTERACTIVE +# PICKERS only. They do NOT change ``CANONICAL_PROVIDERS``, slug identity, +# the ``--provider`` flag, ``/model ``, or any typed path — +# every member slug remains individually addressable. Grouping is a pure +# display affordance; ``group_providers()`` is the single fold used by all +# three picker surfaces so they stay consistent. +# +# group_id -> (display_label, [member_slug, ...]) +# +# Member order is the order shown inside the group submenu. +# --------------------------------------------------------------------------- +PROVIDER_GROUPS: dict[str, tuple[str, list[str]]] = { + "kimi": ("Kimi / Moonshot", ["kimi-coding", "kimi-coding-cn"]), + "minimax": ("MiniMax", ["minimax", "minimax-oauth", "minimax-cn"]), + "xai": ("xAI Grok", ["xai", "xai-oauth"]), + "google": ("Google Gemini", ["gemini", "google-gemini-cli"]), + "openai": ("OpenAI", ["openai-codex", "openai-api"]), + "opencode": ("OpenCode", ["opencode-zen", "opencode-go"]), + "copilot": ("GitHub Copilot", ["copilot", "copilot-acp"]), +} + +# Reverse index: member slug -> group_id. Built once at import. +_SLUG_TO_GROUP: dict[str, str] = { + slug: gid for gid, (_label, members) in PROVIDER_GROUPS.items() for slug in members +} + + +def provider_group_for_slug(slug: str) -> str: + """Return the group_id a provider slug belongs to, or "" if ungrouped.""" + return _SLUG_TO_GROUP.get(str(slug or "").strip().lower(), "") + + +def group_providers(slugs): + """Fold a flat ordered slug iterable into picker rows by provider group. + + DISPLAY ONLY. Used by every interactive picker (``hermes model``, the + setup wizard, the Telegram ``/model`` keyboard) so grouping is identical + across surfaces. + + Each returned row is a dict:: + + {"kind": "single", "slug": } # ungrouped, or + # 1-member group + {"kind": "group", "group_id": , "label":