Merge branch 'main' of github.com:NousResearch/hermes-agent into bb/gui

# Conflicts:
#	tui_gateway/server.py
This commit is contained in:
Brooklyn Nicholson 2026-05-30 13:19:27 -05:00
commit c83cd38391
157 changed files with 10059 additions and 831 deletions

View file

@ -1680,26 +1680,48 @@ def _read_main_provider() -> str:
# per turn — no lock needed. Cleared by ``clear_runtime_main()``.
_RUNTIME_MAIN_PROVIDER: str = ""
_RUNTIME_MAIN_MODEL: str = ""
_RUNTIME_MAIN_BASE_URL: str = ""
_RUNTIME_MAIN_API_KEY: str = ""
_RUNTIME_MAIN_API_MODE: str = ""
def set_runtime_main(provider: str, model: str) -> None:
"""Record the live runtime provider/model for the current AIAgent.
def set_runtime_main(
provider: str,
model: str,
*,
base_url: str = "",
api_key: str = "",
api_mode: str = "",
) -> None:
"""Record the live runtime provider/model/credentials for the current AIAgent.
Called by ``run_agent.AIAgent._sync_runtime_main_for_aux_routing`` (or
equivalent setter) at the top of each turn so that
``_read_main_provider`` / ``_read_main_model`` reflect CLI/gateway
overrides instead of the stale config.yaml default.
For ``custom:`` providers, ``base_url`` and ``api_key`` must also be
recorded so that ``_resolve_auto`` can construct a valid client in
Step 1 instead of falling through to the aggregator chain.
"""
global _RUNTIME_MAIN_PROVIDER, _RUNTIME_MAIN_MODEL
global _RUNTIME_MAIN_BASE_URL, _RUNTIME_MAIN_API_KEY, _RUNTIME_MAIN_API_MODE
_RUNTIME_MAIN_PROVIDER = (provider or "").strip().lower()
_RUNTIME_MAIN_MODEL = (model or "").strip()
_RUNTIME_MAIN_BASE_URL = (base_url or "").strip()
_RUNTIME_MAIN_API_KEY = api_key.strip() if isinstance(api_key, str) else ""
_RUNTIME_MAIN_API_MODE = (api_mode or "").strip()
def clear_runtime_main() -> None:
"""Clear the runtime override (e.g. on session end)."""
global _RUNTIME_MAIN_PROVIDER, _RUNTIME_MAIN_MODEL
global _RUNTIME_MAIN_BASE_URL, _RUNTIME_MAIN_API_KEY, _RUNTIME_MAIN_API_MODE
_RUNTIME_MAIN_PROVIDER = ""
_RUNTIME_MAIN_MODEL = ""
_RUNTIME_MAIN_BASE_URL = ""
_RUNTIME_MAIN_API_KEY = ""
_RUNTIME_MAIN_API_MODE = ""
def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str], Optional[str]]:
@ -2980,6 +3002,18 @@ def _resolve_auto(main_runtime: Optional[Dict[str, Any]] = None) -> Tuple[Option
runtime_api_key = runtime.get("api_key", "")
runtime_api_mode = str(runtime.get("api_mode") or "")
# Fall back to process-local globals when main_runtime dict was not
# provided or was incomplete. ``set_runtime_main()`` now records
# base_url/api_key/api_mode alongside provider/model, so custom:
# providers get the full credential surface in Step 1 of the
# auto-detect chain.
if not runtime_base_url and _RUNTIME_MAIN_BASE_URL:
runtime_base_url = _RUNTIME_MAIN_BASE_URL
if not runtime_api_key and _RUNTIME_MAIN_API_KEY:
runtime_api_key = _RUNTIME_MAIN_API_KEY
if not runtime_api_mode and _RUNTIME_MAIN_API_MODE:
runtime_api_mode = _RUNTIME_MAIN_API_MODE
# ── Warn once if OPENAI_BASE_URL is set but config.yaml uses a named
# provider (not 'custom'). This catches the common "env poisoning"
# scenario where a user switches providers via `hermes model` but the

View file

@ -1283,6 +1283,18 @@ def handle_max_iterations(agent, messages: list, api_call_count: int) -> str:
agent._copy_reasoning_content_for_api(msg, api_msg)
for internal_field in ("reasoning", "finish_reason", "_thinking_prefill"):
api_msg.pop(internal_field, None)
# Strict OpenAI-compatible gateways (Fireworks-backed OpenCode Go,
# Mistral, Moonshot/Kimi) reject any message key outside the Chat
# Completions schema. The main loop drops these via
# ChatCompletionsTransport.convert_messages(), but the summary path
# hand-builds messages and calls chat.completions.create() directly,
# bypassing the transport — so mirror that sanitization here:
# tool_name (SQLite FTS bookkeeping), the codex_* reasoning carriers,
# and every Hermes-internal underscore-prefixed scaffolding key.
for schema_foreign in ("tool_name", "codex_reasoning_items", "codex_message_items"):
api_msg.pop(schema_foreign, None)
for internal_key in [k for k in api_msg if isinstance(k, str) and k.startswith("_")]:
api_msg.pop(internal_key, None)
if _needs_sanitize:
agent._sanitize_tool_calls_for_strict_api(api_msg)
api_messages.append(api_msg)

View file

@ -40,17 +40,47 @@ SUMMARY_PREFIX = (
"window — treat it as background reference, NOT as active instructions. "
"Do NOT answer questions or fulfill requests mentioned in this summary; "
"they were already addressed. "
"Your current task is identified in the '## Active Task' section of the "
"summary — resume exactly from there. "
"Respond ONLY to the latest user message that appears AFTER this "
"summary — that message is the single source of truth for what to do "
"right now. "
"If the latest user message is consistent with the '## Active Task' "
"section, you may use the summary as background. If the latest user "
"message contradicts, supersedes, changes topic from, or in any way "
"diverges from '## Active Task' / '## In Progress' / '## Pending User "
"Asks' / '## Remaining Work', the latest message WINS — discard those "
"stale items entirely and do not 'wrap up the old task first'. "
"Reverse signals in the latest message (e.g. 'stop', 'undo', 'roll "
"back', 'just verify', 'don't do that anymore', 'never mind', a new "
"topic) must immediately end any in-flight work described in the "
"summary; do not re-surface it in later turns. "
"IMPORTANT: Your persistent memory (MEMORY.md, USER.md) in the system "
"prompt is ALWAYS authoritative and active — never ignore or deprioritize "
"memory content due to this compaction note. "
"Respond ONLY to the latest user message "
"that appears AFTER this summary. The current session state (files, "
"config, etc.) may reflect work described here — avoid repeating it:"
"The current session state (files, config, etc.) may reflect work "
"described here — avoid repeating it:"
)
LEGACY_SUMMARY_PREFIX = "[CONTEXT SUMMARY]:"
# Handoff prefixes that shipped in earlier releases. A summary persisted under
# one of these can be inherited into a resumed lineage (#35344); when it is
# re-normalized on re-compaction we must strip the OLD prefix too, otherwise the
# stale directive it carried (e.g. "resume exactly from Active Task") survives
# embedded in the body and keeps hijacking replies. Keep newest-first; entries
# are matched literally. Add a frozen copy here whenever SUMMARY_PREFIX changes.
_HISTORICAL_SUMMARY_PREFIXES = (
# Pre-#35344: contained the self-contradicting "resume exactly" directive.
"[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
"into the summary below. This is a handoff from a previous context "
"window — treat it as background reference, NOT as active instructions. "
"Do NOT answer questions or fulfill requests mentioned in this summary; "
"they were already addressed. "
"Your current task is identified in the '## Active Task' section of the "
"summary — resume exactly from there. "
"Respond ONLY to the latest user message "
"that appears AFTER this summary. The current session state (files, "
"config, etc.) may reflect work described here — avoid repeating it:",
)
# Minimum tokens for the summary output
_MIN_SUMMARY_TOKENS = 2000
# Proportion of compressed content to allocate for summary
@ -1236,11 +1266,27 @@ Summary generation was unavailable, so this is a best-effort deterministic fallb
# Shared structured template (used by both paths).
_template_sections = f"""## Active Task
[THE SINGLE MOST IMPORTANT FIELD. Copy the user's most recent request or
task assignment verbatim the exact words they used. If multiple tasks
were requested and only some are done, list only the ones NOT yet completed.
Continuation should pick up exactly here. Example:
[THE SINGLE MOST IMPORTANT FIELD. Capture the user's most recent unfulfilled
input verbatim the exact words they used. This includes:
- Explicit task assignments ("refactor the auth module")
- Questions awaiting an answer ("waarom staat X op Y?", "wat zijn de volgende stappen?")
- Decisions awaiting input ("optie A of B?")
- Ongoing discussions where the assistant owes the next substantive reply
A conversation where the user just asked a question IS an active task the
task is "answer that question with full context". Do NOT write "None" merely
because the user did not issue an imperative command; reserve "None" for the
rare case where the last exchange was fully resolved and the user said
something like "thanks, that's all".
If multiple items are outstanding, list only the ones NOT yet completed.
Continuation should pick up exactly here. Examples:
"User asked: 'Now refactor the auth module to use JWT instead of sessions'"
"User asked: 'Waarom stond provider ineens op openrouter?' — needs investigation + answer"
"User chose option A; awaiting implementation of step 2"
If the user's most recent message was a reverse signal (stop, undo, roll
back, never mind, just verify, change of topic) that supersedes earlier
work, write the reverse signal verbatim and DO NOT carry forward the
cancelled task. Example: "User asked: 'Stop the i18n refactor and just
verify the current diff' — earlier i18n in-flight work is cancelled."
If no outstanding task exists, write "None."]
## Goal
@ -1306,7 +1352,7 @@ PREVIOUS SUMMARY:
NEW TURNS TO INCORPORATE:
{content_to_summarize}
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled request — this is the most important field for task continuity.
Update the summary using this exact structure. PRESERVE all existing information that is still relevant. ADD new completed actions to the numbered list (continue numbering). Move items from "In Progress" to "Completed Actions" when done. Move answered questions to "Resolved Questions". Update "Active State" to reflect current state. Remove information only if it is clearly obsolete. CRITICAL: Update "## Active Task" to reflect the user's most recent unfulfilled input — this includes any question, decision request, or discussion turn that the assistant has not yet answered. Only write "None" if the last exchange was fully resolved.
{_template_sections}"""
else:
@ -1470,9 +1516,16 @@ The user has requested that this compaction PRIORITISE preserving all informatio
@staticmethod
def _strip_summary_prefix(summary: str) -> str:
"""Return summary body without the current or legacy handoff prefix."""
"""Return summary body without the current, legacy, or any historical
handoff prefix.
Historical prefixes must be stripped too: a handoff persisted under an
older prefix can be inherited into a resumed lineage (#35344), and if we
only re-prepend the current prefix without removing the old one, the
stale directive it carried stays embedded in the body.
"""
text = (summary or "").strip()
for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX):
for prefix in (SUMMARY_PREFIX, LEGACY_SUMMARY_PREFIX, *_HISTORICAL_SUMMARY_PREFIXES):
if text.startswith(prefix):
return text[len(prefix):].lstrip()
return text
@ -1486,7 +1539,9 @@ The user has requested that this compaction PRIORITISE preserving all informatio
@staticmethod
def _is_context_summary_content(content: Any) -> bool:
text = _content_text_for_contains(content).lstrip()
return text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX)
if text.startswith(SUMMARY_PREFIX) or text.startswith(LEGACY_SUMMARY_PREFIX):
return True
return any(text.startswith(p) for p in _HISTORICAL_SUMMARY_PREFIXES)
@classmethod
def _find_latest_context_summary(

View file

@ -115,6 +115,15 @@ class ContextEngine(ABC):
"""
return False
def should_defer_preflight_to_real_usage(self, rough_tokens: int) -> bool:
"""Return True when preflight should trust recent real usage instead.
Built-in compression uses this to avoid re-compacting from known-noisy
rough estimates after a compressed request has already fit. Third-party
engines can ignore it safely.
"""
return False
# -- Optional: manual /compress preflight ------------------------------
def has_content_to_compress(self, messages: List[Dict[str, Any]]) -> bool:

View file

@ -575,19 +575,18 @@ def compress_context(
force=True,
)
# Update token estimate after compaction so pressure calculations
# use the post-compression count, not the stale pre-compression one.
# Use estimate_request_tokens_rough() so tool schemas are included —
# with 50+ tools enabled, schemas alone can add 20-30K tokens, and
# omitting them delays the next compression cycle far past the
# configured threshold (issue #14695).
# Keep the post-compression rough estimate for diagnostics, but do not
# treat it as provider-reported prompt usage. Schema-heavy rough estimates
# can remain above threshold even after the next real API request fits.
_compressed_est = estimate_request_tokens_rough(
compressed,
system_prompt=new_system_prompt or "",
tools=agent.tools or None,
)
agent.context_compressor.last_prompt_tokens = _compressed_est
agent.context_compressor.last_compression_rough_tokens = _compressed_est
agent.context_compressor.last_prompt_tokens = -1
agent.context_compressor.last_completion_tokens = 0
agent.context_compressor.awaiting_real_usage_after_compression = True
# Clear the file-read dedup cache. After compression the original
# read content is summarised away — if the model re-reads the same
@ -599,7 +598,7 @@ def compress_context(
pass
logger.info(
"context compression done: session=%s messages=%d->%d tokens=~%s",
"context compression done: session=%s messages=%d->%d rough_tokens=~%s awaiting_real_usage=true",
agent.session_id or "none", _pre_msg_count, len(compressed),
f"{_compressed_est:,}",
)

View file

@ -392,6 +392,9 @@ def run_conversation(
set_runtime_main(
getattr(agent, "provider", "") or "",
getattr(agent, "model", "") or "",
base_url=getattr(agent, "base_url", "") or "",
api_key=getattr(agent, "api_key", "") or "",
api_mode=getattr(agent, "api_mode", "") or "",
)
except Exception:
pass
@ -600,18 +603,50 @@ def run_conversation(
system_prompt=active_system_prompt or "",
tools=agent.tools or None,
)
_compressor = agent.context_compressor
_defer_preflight = getattr(
_compressor,
"should_defer_preflight_to_real_usage",
lambda _tokens: False,
)
_preflight_deferred = _defer_preflight(_preflight_tokens)
if agent.context_compressor.should_compress(_preflight_tokens):
if not _preflight_deferred:
# Keep the CLI/ACP context display in sync with what preflight
# actually measured. The status bar reads
# ``compressor.last_prompt_tokens``, which otherwise only updates
# from a *successful* API response. When the conversation has grown
# since the last successful call — or when compression then fails
# (e.g. the auxiliary summary model times out) and no fresh usage
# arrives — the bar stays stuck at the old, smaller value while
# preflight reports a much larger number, looking out of sync.
# Seed it with the fresh estimate (only ever revising upward; a real
# ``update_from_response`` will correct it after the next API call).
# Skipped when deferring — a deferred estimate is known to over-count
# vs the last real provider prompt, so trusting it for the display
# would re-introduce the very desync we're avoiding.
if _preflight_tokens > (_compressor.last_prompt_tokens or 0):
_compressor.last_prompt_tokens = _preflight_tokens
if _preflight_deferred:
logger.info(
"Skipping preflight compression: rough estimate ~%s >= %s, "
"but last real provider prompt was %s after compression",
f"{_preflight_tokens:,}",
f"{_compressor.threshold_tokens:,}",
f"{_compressor.last_real_prompt_tokens:,}",
)
elif _compressor.should_compress(_preflight_tokens):
logger.info(
"Preflight compression: ~%s tokens >= %s threshold (model %s, ctx %s)",
f"{_preflight_tokens:,}",
f"{agent.context_compressor.threshold_tokens:,}",
f"{_compressor.threshold_tokens:,}",
agent.model,
f"{agent.context_compressor.context_length:,}",
f"{_compressor.context_length:,}",
)
agent._emit_status(
f"📦 Preflight compression: ~{_preflight_tokens:,} tokens "
f">= {agent.context_compressor.threshold_tokens:,} threshold. "
f">= {_compressor.threshold_tokens:,} threshold. "
"This may take a moment."
)
# May need multiple passes for very large sessions with small
@ -646,8 +681,8 @@ def run_conversation(
system_prompt=active_system_prompt or "",
tools=agent.tools or None,
)
if _preflight_tokens < agent.context_compressor.threshold_tokens:
break # Under threshold
if not _compressor.should_compress(_preflight_tokens):
break # Under threshold or anti-thrash guard stopped it
# Plugin hook: pre_llm_call
# Fired once per turn before the tool-calling loop. Plugins can
@ -1457,7 +1492,8 @@ def run_conversation(
if retry_count >= max_retries:
# Try fallback before giving up
agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
if agent._has_pending_fallback():
agent._buffer_status(f"⚠️ Max retries ({max_retries}) for invalid responses — trying fallback...")
if agent._try_activate_fallback():
retry_count = 0
compression_attempts = 0
@ -3059,12 +3095,17 @@ def run_conversation(
) and not is_context_length_error
if is_client_error:
# Try fallback before aborting — a different provider
# may not have the same issue (rate limit, auth, etc.)
if classified.reason == FailoverReason.content_policy_blocked:
agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
else:
agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
# Try fallback before aborting — a different provider may
# not have the same issue (rate limit, auth, etc.). Only
# announce the attempt when a fallback chain actually
# exists; otherwise "trying fallback..." is a lie and the
# session looks like it's recovering when it's about to
# abort silently (#35314, #17446).
if agent._has_pending_fallback():
if classified.reason == FailoverReason.content_policy_blocked:
agent._buffer_status("⚠️ Provider safety filter blocked this request — trying fallback...")
else:
agent._buffer_status(f"⚠️ Non-retryable error (HTTP {status_code}) — trying fallback...")
if agent._try_activate_fallback():
retry_count = 0
compression_attempts = 0
@ -3207,7 +3248,8 @@ def run_conversation(
retry_count = 0
continue
# Try fallback before giving up entirely
agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
if agent._has_pending_fallback():
agent._buffer_status(f"⚠️ Max retries ({max_retries}) exhausted — trying fallback...")
if agent._try_activate_fallback():
retry_count = 0
compression_attempts = 0
@ -3862,6 +3904,11 @@ def run_conversation(
# inflate completion_tokens with reasoning,
# causing premature compression. (#12026)
_real_tokens = _compressor.last_prompt_tokens
elif _compressor.last_prompt_tokens == -1:
# Compression just ran and no API-reported prompt count
# has arrived yet. Avoid treating a schema-heavy rough
# post-compression estimate as real context pressure.
_real_tokens = 0
else:
# Include tool schemas — with 50+ tools enabled
# these add 20-30K tokens the messages-only
@ -4443,6 +4490,55 @@ def run_conversation(
except Exception as _ver_err:
logger.debug("file-mutation verifier footer failed: %s", _ver_err)
# Turn-completion explainer.
# When a turn ends abnormally after substantive work — empty content
# after retries, a partial/truncated stream, a still-pending tool
# result, or an iteration/budget limit — the user otherwise gets a
# blank or fragmentary response box with no consolidated reason why
# the agent stopped (#34452). Surface a single user-visible
# explanation derived from ``_turn_exit_reason``, mirroring the
# file-mutation verifier footer pattern above.
#
# Gate carefully so healthy turns stay quiet:
# - ``text_response(...)`` exits never produce an explanation
# (handled inside the formatter), so a terse ``Done.`` is silent.
# - We only ACT when there is no genuinely usable reply this turn:
# an empty response, the "(empty)" terminal sentinel, or a
# suspiciously short partial fragment with no terminating
# punctuation (e.g. "The"). A real short answer keeps its text.
if not interrupted:
try:
if agent._turn_completion_explainer_enabled():
_stripped = (final_response or "").strip()
_is_empty_terminal = _stripped == "" or _stripped == "(empty)"
# A short fragment that is not a normal text_response exit
# and lacks sentence-ending punctuation is treated as a
# truncated partial (the "The" case from #34452).
_is_partial_fragment = (
not _is_empty_terminal
and not str(_turn_exit_reason).startswith("text_response")
and len(_stripped) <= 24
and _stripped[-1:] not in {".", "!", "?", "", "", "", "`", ")"}
)
if _is_empty_terminal or _is_partial_fragment:
_explanation = agent._format_turn_completion_explanation(
_turn_exit_reason
)
if _explanation:
if _is_empty_terminal:
# Replace the bare "(empty)"/blank sentinel with
# the actionable explanation.
final_response = _explanation
else:
# Keep the partial fragment, append the reason so
# the user sees both what arrived and why it
# stopped.
final_response = (
_stripped + "\n\n" + _explanation
)
except Exception as _exp_err:
logger.debug("turn-completion explainer failed: %s", _exp_err)
_response_transformed = False
# Plugin hook: transform_llm_output

View file

@ -247,18 +247,13 @@ def _cmd_restart() -> int:
def _cmd_which(server_id: str) -> int:
from agent.lsp.install import INSTALL_RECIPES, hermes_lsp_bin_dir
import shutil as _shutil
from agent.lsp.install import INSTALL_RECIPES, _existing_binary
recipe = INSTALL_RECIPES.get(server_id)
bin_name = (recipe or {}).get("bin", server_id)
staged = hermes_lsp_bin_dir() / bin_name
if staged.exists():
sys.stdout.write(str(staged) + "\n")
return 0
on_path = _shutil.which(bin_name)
if on_path:
sys.stdout.write(on_path + "\n")
resolved = _existing_binary(bin_name)
if resolved:
sys.stdout.write(resolved + "\n")
return 0
sys.stderr.write(f"{server_id}: not installed\n")
return 1
@ -292,11 +287,9 @@ def _backend_warnings() -> list:
suggestion across common platforms.
"""
import shutil as _shutil
from agent.lsp.install import hermes_lsp_bin_dir
from agent.lsp.install import _existing_binary
notes: list = []
bash_installed = _shutil.which("bash-language-server") is not None or (
(hermes_lsp_bin_dir() / "bash-language-server").exists()
)
bash_installed = _existing_binary("bash-language-server") is not None
if bash_installed and _shutil.which("shellcheck") is None:
notes.append(
"bash-language-server is installed but shellcheck is missing — "

View file

@ -44,6 +44,7 @@ from __future__ import annotations
import asyncio
import logging
import os
import sys
from pathlib import Path
from typing import Any, Awaitable, Callable, Dict, List, Optional, Set
from urllib.parse import quote, unquote
@ -244,15 +245,27 @@ class LSPClient:
await self._cleanup_process()
raise
@staticmethod
def _win_wrap_cmd(cmd: List[str]) -> List[str]:
"""On Windows, wrap .cmd/.bat shims so CreateProcess can run them."""
exe = cmd[0]
if exe.lower().endswith((".cmd", ".bat")):
return ["cmd.exe", "/c", *cmd]
return cmd
async def _spawn(self) -> None:
env = dict(os.environ)
if self._env:
env.update(self._env)
cmd = self._command
if sys.platform == "win32":
cmd = self._win_wrap_cmd(cmd)
try:
self._proc = await asyncio.create_subprocess_exec(
self._command[0],
*self._command[1:],
cmd[0],
*cmd[1:],
stdin=asyncio.subprocess.PIPE,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE,
@ -261,7 +274,7 @@ class LSPClient:
)
except FileNotFoundError as e:
raise LSPProtocolError(
f"LSP server binary not found: {self._command[0]} ({e})"
f"LSP server binary not found: {cmd[0]} ({e})"
) from e
# Drain stderr at debug level — if we don't, the pipe buffer

View file

@ -108,6 +108,11 @@ INSTALL_RECIPES: Dict[str, Dict[str, Any]] = {
_install_locks: Dict[str, threading.Lock] = {}
_install_results: Dict[str, Optional[str]] = {}
_install_lock_meta = threading.Lock()
_WINDOWS_WRAPPER_SUFFIXES = (".cmd", ".exe", ".bat")
def _is_windows() -> bool:
return os.name == "nt"
def hermes_lsp_bin_dir() -> Path:
@ -120,14 +125,33 @@ def hermes_lsp_bin_dir() -> Path:
return p
def _native_binary_candidates(base: Path) -> list[Path]:
"""Return platform-native executable candidates for a staged binary."""
candidates = [base]
if _is_windows():
existing = {str(base).lower()}
for suffix in _WINDOWS_WRAPPER_SUFFIXES:
candidate = Path(str(base) + suffix)
key = str(candidate).lower()
if key not in existing:
candidates.append(candidate)
existing.add(key)
return candidates
def _existing_binary(name: str) -> Optional[str]:
"""Probe the staging dir + PATH for a binary named ``name``."""
staged = hermes_lsp_bin_dir() / name
if staged.exists() and os.access(staged, os.X_OK):
return str(staged)
for staged in _native_binary_candidates(hermes_lsp_bin_dir() / name):
if staged.exists() and os.access(staged, os.X_OK):
return str(staged)
on_path = shutil.which(name)
if on_path:
return on_path
if _is_windows():
for suffix in _WINDOWS_WRAPPER_SUFFIXES:
on_path = shutil.which(f"{name}{suffix}")
if on_path:
return on_path
return None
@ -250,12 +274,7 @@ def _install_npm(
# Find the bin
nm_bin = staging / "node_modules" / ".bin" / bin_name
if os.name == "nt":
# On Windows npm sometimes drops `.cmd` shims
candidates = [nm_bin, nm_bin.with_suffix(".cmd")]
else:
candidates = [nm_bin]
for c in candidates:
for c in _native_binary_candidates(nm_bin):
if c.exists():
# Symlink into our `lsp/bin/` for stable PATH access.
link = hermes_lsp_bin_dir() / c.name
@ -301,7 +320,7 @@ def _install_go(pkg: str, bin_name: str) -> Optional[str]:
logger.warning("[install] go install errored for %s: %s", pkg, e)
return None
bin_path = staging / bin_name
if os.name == "nt":
if _is_windows():
bin_path = bin_path.with_suffix(".exe")
if bin_path.exists():
return str(bin_path)
@ -337,19 +356,24 @@ def _install_pip(pkg: str, bin_name: str) -> Optional[str]:
except (subprocess.TimeoutExpired, OSError) as e:
logger.warning("[install] pip install errored for %s: %s", pkg, e)
return None
# Look for the script
bin_path = pip_target / "bin" / bin_name
if bin_path.exists():
link = hermes_lsp_bin_dir() / bin_name
if not link.exists():
try:
link.symlink_to(bin_path)
except (OSError, NotImplementedError):
try:
shutil.copy2(bin_path, link)
except OSError:
return str(bin_path)
return str(link if link.exists() else bin_path)
# Look for the console script. POSIX wheels generally write to bin/,
# while native Windows installs use Scripts/.
script_dirs = [pip_target / "bin"]
if _is_windows():
script_dirs.append(pip_target / "Scripts")
for script_dir in script_dirs:
for bin_path in _native_binary_candidates(script_dir / bin_name):
if bin_path.exists():
link = hermes_lsp_bin_dir() / bin_path.name
if not link.exists():
try:
link.symlink_to(bin_path)
except (OSError, NotImplementedError):
try:
shutil.copy2(bin_path, link)
except OSError:
return str(bin_path)
return str(link if link.exists() else bin_path)
return None

View file

@ -180,28 +180,9 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
except Exception:
pass
# Checkpoint for file-mutating tools
if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
try:
file_path = function_args.get("path", "")
if file_path:
work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
except Exception:
pass
# Checkpoint before destructive terminal commands
if function_name == "terminal" and agent._checkpoint_mgr.enabled:
try:
cmd = function_args.get("command", "")
if _is_destructive_command(cmd):
cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
agent._checkpoint_mgr.ensure_checkpoint(
cwd, f"before terminal: {cmd[:60]}"
)
except Exception:
pass
# ── Block evaluation (BEFORE checkpoint preflight) ───────────
# We must know whether the tool will execute before touching
# checkpoint state (dedup slot, real snapshots).
block_result = None
blocked_by_guardrail = False
if _ts_scope_block is not None:
@ -224,6 +205,30 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
block_result = agent._guardrail_block_result(guardrail_decision)
blocked_by_guardrail = True
# ── Checkpoint preflight (only for tools that will execute) ──
if block_result is None:
# Checkpoint for file-mutating tools
if function_name in {"write_file", "patch"} and agent._checkpoint_mgr.enabled:
try:
file_path = function_args.get("path", "")
if file_path:
work_dir = agent._checkpoint_mgr.get_working_dir_for_path(file_path)
agent._checkpoint_mgr.ensure_checkpoint(work_dir, f"before {function_name}")
except Exception:
pass
# Checkpoint before destructive terminal commands
if function_name == "terminal" and agent._checkpoint_mgr.enabled:
try:
cmd = function_args.get("command", "")
if _is_destructive_command(cmd):
cwd = function_args.get("workdir") or os.getenv("TERMINAL_CWD", os.getcwd())
agent._checkpoint_mgr.ensure_checkpoint(
cwd, f"before terminal: {cmd[:60]}"
)
except Exception:
pass
parsed_calls.append((tool_call, function_name, function_args, block_result, blocked_by_guardrail))
# ── Logging / callbacks ──────────────────────────────────────────
@ -301,33 +306,38 @@ def execute_tool_calls_concurrent(agent, assistant_message, messages: list, effe
# submit site below (GHSA-qg5c-hvr5-hjgr, #13617).
start = time.time()
try:
result = agent._invoke_tool(
function_name,
function_args,
effective_task_id,
tool_call.id,
messages=messages,
pre_tool_block_checked=True,
)
except Exception as tool_error:
result = f"Error executing tool '{function_name}': {tool_error}"
logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
duration = time.time() - start
is_error, _ = _detect_tool_failure(function_name, result)
if is_error:
logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
else:
logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
results[index] = (function_name, function_args, result, duration, is_error, False)
# Tear down worker-tid tracking. Clear any interrupt bit we may
# have set so the next task scheduled onto this recycled tid
# starts with a clean slate.
with agent._tool_worker_threads_lock:
agent._tool_worker_threads.discard(_worker_tid)
try:
_ra()._set_interrupt(False, _worker_tid)
except Exception:
pass
try:
result = agent._invoke_tool(
function_name,
function_args,
effective_task_id,
tool_call.id,
messages=messages,
pre_tool_block_checked=True,
)
except Exception as tool_error:
result = f"Error executing tool '{function_name}': {tool_error}"
logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True)
duration = time.time() - start
is_error, _ = _detect_tool_failure(function_name, result)
if is_error:
logger.info("tool %s failed (%.2fs): %s", function_name, duration, result[:200])
else:
logger.info("tool %s completed (%.2fs, %d chars)", function_name, duration, len(result))
results[index] = (function_name, function_args, result, duration, is_error, False)
finally:
# Tear down worker-tid tracking. Clear any interrupt bit we may
# have set so the next task scheduled onto this recycled tid
# starts with a clean slate. This MUST be in a finally block
# because BaseException subclasses (CancelledError, KeyboardInterrupt)
# bypass ``except Exception`` and would otherwise leak the tid
# into _interrupted_threads, poisoning the recycled thread.
with agent._tool_worker_threads_lock:
agent._tool_worker_threads.discard(_worker_tid)
try:
_ra()._set_interrupt(False, _worker_tid)
except Exception:
pass
# Start spinner for CLI mode (skip when TUI handles tool progress)
spinner = None
@ -753,10 +763,14 @@ def execute_tool_calls_sequential(agent, assistant_message, messages: list, effe
elif function_name == "delegate_task":
tasks_arg = function_args.get("tasks")
if tasks_arg and isinstance(tasks_arg, list):
spinner_label = f"🔀 delegating {len(tasks_arg)} tasks"
spinner_label = f"🔀 delegating {len(tasks_arg)} tasks · (/agents to monitor)"
else:
goal_preview = (function_args.get("goal") or "")[:30]
spinner_label = f"🔀 {goal_preview}" if goal_preview else "🔀 delegating"
spinner_label = (
f"🔀 {goal_preview} · (/agents to monitor)"
if goal_preview
else "🔀 delegating · (/agents to monitor)"
)
spinner = None
if agent._should_emit_quiet_tool_messages() and agent._should_start_quiet_spinner():
face = random.choice(KawaiiSpinner.get_waiting_faces())

40
cli.py
View file

@ -787,8 +787,10 @@ def AIAgent(*args, **kwargs):
def get_tool_definitions(*args, **kwargs):
from hermes_cli.mcp_startup import wait_for_mcp_discovery
from model_tools import get_tool_definitions as _get_tool_definitions
wait_for_mcp_discovery()
return _get_tool_definitions(*args, **kwargs)
@ -896,9 +898,12 @@ def _prepare_deferred_agent_startup() -> None:
exc_info=True,
)
try:
from tools.mcp_tool import discover_mcp_tools
from hermes_cli.mcp_startup import start_background_mcp_discovery
discover_mcp_tools()
start_background_mcp_discovery(
logger=logger,
thread_name="termux-cli-mcp-discovery",
)
except Exception:
logger.debug(
"MCP tool discovery failed at deferred CLI startup",
@ -1537,9 +1542,17 @@ def _query_osc11_background() -> str | None:
Most modern terminals reply with \x1b]11;rgb:RRRR/GGGG/BBBB\x1b\\
within a few ms. We wait up to 100ms total before giving up.
Returns "#RRGGBB" or None on timeout / non-tty.
Skipped over SSH: the round-trip routinely exceeds our 100ms budget, so a
late reply lands after prompt_toolkit has grabbed the tty its payload
leaks in as typed text and the BEL terminator reads as Ctrl+G (open
editor), trapping the user in a stray editor. Remote sessions fall back to
COLORFGBG / env hints / the dark default instead.
"""
if not sys.stdin.isatty() or not sys.stdout.isatty():
return None
if any(os.environ.get(v) for v in ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY")):
return None
try:
import termios
import tty
@ -1587,8 +1600,11 @@ def _query_osc11_background() -> str | None:
r, g, b = norm(m.group(1)), norm(m.group(2)), norm(m.group(3))
return f"#{r:02X}{g:02X}{b:02X}"
finally:
# TCSAFLUSH discards any unread input as it restores the original
# attributes — scrubs a slow/partial OSC 11 reply out of the tty
# buffer before prompt_toolkit can read it as keystrokes.
try:
termios.tcsetattr(fd, termios.TCSANOW, old)
termios.tcsetattr(fd, termios.TCSAFLUSH, old)
except Exception:
pass
@ -4872,6 +4888,10 @@ class HermesCLI:
if not self._ensure_runtime_credentials():
return False
from hermes_cli.mcp_startup import wait_for_mcp_discovery
wait_for_mcp_discovery()
# Initialize SQLite session store for CLI sessions (if not already done in __init__)
if self._session_db is None:
try:
@ -12928,6 +12948,13 @@ class HermesCLI:
if event.app.is_running:
event.app.exit()
event.app.current_buffer.reset(append_to_history=True)
# Force a repaint: process_command() prints through
# patch_stdout (scrolls output above the prompt) and never
# invalidates the app, so the just-cleared input area can
# keep showing the submitted text until some unrelated
# redraw fires. Every other early-return branch in this
# handler invalidates after reset — match them.
event.app.invalidate()
return
# Handle /steer while the agent is running immediately on the
@ -12939,6 +12966,13 @@ class HermesCLI:
if self._should_handle_steer_command_inline(text, has_images=has_images):
self.process_command(text)
event.app.current_buffer.reset(append_to_history=True)
# Force a repaint after clearing the buffer. /steer is
# dispatched mid-run while the agent streams output through
# patch_stdout; process_command() never invalidates the
# app, so without this the submitted "/steer <text>" can
# linger in the input area (looking unsent) and invite an
# accidental re-submit. See issue #34569.
event.app.invalidate()
return
# Snapshot and clear attached images

View file

@ -474,6 +474,13 @@ class GatewayConfig:
# Delivery settings
always_log_local: bool = True # Always save cron outputs to local files
# Drop outbound "silence narration" messages (e.g. *(silent)*, 🔇, a bare
# ".") pre-send. These are model hallucinations emitted when a persona has
# nothing actionable to say; in bot-to-bot channels they mirror back and
# forth, burning tokens and crashing models. Substrate-level guard that
# survives SOUL.md/prompt drift across providers. Opt out with False for
# raw passthrough.
filter_silence_narration: bool = True
# STT settings
stt_enabled: bool = True # Whether to auto-transcribe inbound voice messages
@ -582,6 +589,7 @@ class GatewayConfig:
"quick_commands": self.quick_commands,
"sessions_dir": str(self.sessions_dir),
"always_log_local": self.always_log_local,
"filter_silence_narration": self.filter_silence_narration,
"stt_enabled": self.stt_enabled,
"group_sessions_per_user": self.group_sessions_per_user,
"thread_sessions_per_user": self.thread_sessions_per_user,
@ -650,6 +658,9 @@ class GatewayConfig:
quick_commands=quick_commands,
sessions_dir=sessions_dir,
always_log_local=_coerce_bool(data.get("always_log_local"), True),
filter_silence_narration=_coerce_bool(
data.get("filter_silence_narration"), True
),
stt_enabled=_coerce_bool(stt_enabled, True),
group_sessions_per_user=_coerce_bool(group_sessions_per_user, True),
thread_sessions_per_user=_coerce_bool(thread_sessions_per_user, False),
@ -757,21 +768,32 @@ def load_gateway_config() -> GatewayConfig:
if "always_log_local" in yaml_cfg:
gw_data["always_log_local"] = yaml_cfg["always_log_local"]
if "filter_silence_narration" in yaml_cfg:
gw_data["filter_silence_narration"] = yaml_cfg[
"filter_silence_narration"
]
if "unauthorized_dm_behavior" in yaml_cfg:
gw_data["unauthorized_dm_behavior"] = _normalize_unauthorized_dm_behavior(
yaml_cfg.get("unauthorized_dm_behavior"),
"pair",
)
# Merge platforms section from config.yaml into gw_data so that
# nested keys like platforms.webhook.extra.routes are loaded.
yaml_platforms = yaml_cfg.get("platforms")
# Merge platform config into gw_data so runtime-only settings under
# ``gateway.platforms`` are loaded the same way as top-level
# ``platforms``. Merge nested first so top-level config keeps
# precedence, matching the existing gateway.streaming fallback.
gateway_cfg = yaml_cfg.get("gateway")
gateway_platforms = gateway_cfg.get("platforms") if isinstance(gateway_cfg, dict) else None
platforms_data = gw_data.setdefault("platforms", {})
if not isinstance(platforms_data, dict):
platforms_data = {}
gw_data["platforms"] = platforms_data
if isinstance(yaml_platforms, dict):
for plat_name, plat_block in yaml_platforms.items():
def _merge_platform_map(source_platforms: Any) -> None:
if not isinstance(source_platforms, dict):
return
for plat_name, plat_block in source_platforms.items():
if not isinstance(plat_block, dict):
continue
existing = platforms_data.get(plat_name, {})
@ -785,6 +807,10 @@ def load_gateway_config() -> GatewayConfig:
if merged_extra:
merged["extra"] = merged_extra
platforms_data[plat_name] = merged
_merge_platform_map(gateway_platforms)
_merge_platform_map(yaml_cfg.get("platforms"))
if platforms_data:
gw_data["platforms"] = platforms_data
# Iterate built-in platforms plus any registered plugin platforms
# so plugin authors get the same shared-key bridging (#24836).
@ -890,6 +916,18 @@ def load_gateway_config() -> GatewayConfig:
if entry.apply_yaml_config_fn is None:
continue
platform_cfg = yaml_cfg.get(entry.name)
# Fall back to the platform's block under ``platforms`` /
# ``gateway.platforms`` so adapter hooks still run when the
# user configured the platform only under those nested paths
# (e.g. ``platforms.discord.extra.allow_from``) and not via a
# top-level ``discord:`` block.
if not isinstance(platform_cfg, dict):
for _src in (gateway_platforms, yaml_cfg.get("platforms")):
if isinstance(_src, dict):
_candidate = _src.get(entry.name)
if isinstance(_candidate, dict):
platform_cfg = _candidate
break
if not isinstance(platform_cfg, dict):
continue
try:

View file

@ -9,6 +9,8 @@ Routes messages to the appropriate destination based on:
"""
import logging
import os
import re
from pathlib import Path
from datetime import datetime
from dataclasses import dataclass
@ -21,6 +23,32 @@ logger = logging.getLogger(__name__)
MAX_PLATFORM_OUTPUT = 4000
TRUNCATED_VISIBLE = 3800
# Matches strings that are *only* a "silence" narration with optional markdown
# wrappers. Covers: *(silent)*, _silent_, `silent`, ~silent~, (silent), silent,
# 🔇, a bare ".", "…", and the whitespace/marker-padded variants seen in the
# wild. Anchored to start/end so substantive messages that merely *contain* the
# word "silent" are never matched.
_SILENCE_NARRATION = re.compile(
r'^[\s*_~`]*\(?\s*(silent|silence|no\s+response|no\s+reply)\s*\.?\)?[\s*_~`]*$'
r'|^[\s*_~`]*[\U0001F507\.\u2026]+[\s*_~`]*$',
re.IGNORECASE,
)
def _is_silence_narration(content: Optional[str]) -> bool:
"""Return True when ``content`` is *only* a silence-narration token.
Length-guarded (real messages are longer) and anchored to the whole string
so legitimate prose like "The deployment ran silently" or "Silence is
golden here is the plan..." is never flagged.
"""
if not content:
return False
stripped = content.strip()
if not stripped or len(stripped) > 64: # length guard
return False
return bool(_SILENCE_NARRATION.match(stripped))
from .config import Platform, GatewayConfig
from .session import SessionSource
@ -261,6 +289,18 @@ class DeliveryRouter:
path.write_text(content)
return path
def _filter_silence_narration_enabled(self) -> bool:
"""Whether the outbound silence-narration filter is active.
``HERMES_FILTER_SILENCE_NARRATION`` env var overrides config when set;
otherwise the ``gateway.filter_silence_narration`` config flag wins
(default True).
"""
env = os.getenv("HERMES_FILTER_SILENCE_NARRATION")
if env is not None:
return env.strip().lower() in ("1", "true", "yes", "on")
return bool(getattr(self.config, "filter_silence_narration", True))
async def _deliver_to_platform(
self,
target: DeliveryTarget,
@ -286,6 +326,27 @@ class DeliveryRouter:
+ f"\n\n... [truncated, full output saved to {saved_path}]"
)
# Substrate-level anti-loop guard: drop hallucinated "silence narration"
# (*(silent)*, 🔇, a bare ".", etc.) before it ever reaches the adapter.
# In bot-to-bot channels these tokens mirror back and forth until a
# model crashes with "no content after all retries". Behavioral prompt
# rules drift across providers; this single chokepoint covers every
# platform adapter regardless of which persona's prompt failed.
# Local/file delivery (_deliver_local) is a separate path and is never
# filtered — saved silence has no loop risk.
if self._filter_silence_narration_enabled() and _is_silence_narration(content):
logger.warning(
"Dropped silence-narration outbound to %s (chat=%s): %r",
target.platform.value,
target.chat_id,
content[:40],
)
return {
"success": True,
"filtered": "silence_narration",
"delivered": False,
}
send_metadata = dict(metadata or {})
is_named_telegram_private_topic = False
named_telegram_private_topic_name: Optional[str] = None

View file

@ -1191,10 +1191,12 @@ _MEDIA_EXT_ALTERNATION = "|".join(
# bare-path detector (extract_local_files) downstream rather than silently
# deleted. Shared by the non-streaming dispatch path and the streaming
# consumer so both behave identically.
# Path anchors: ``~/`` (Unix home-relative), ``/`` (Unix absolute),
# ``X:\\`` or ``X:/`` (Windows drive-letter absolute — #34632).
MEDIA_TAG_CLEANUP_RE = re.compile(
r'''[`"']?MEDIA:\s*'''
r'''(?P<path>`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|'''
r'''(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:''' + _MEDIA_EXT_ALTERNATION + r'''))'''
r'''(?:~/|/|[A-Za-z]:[/\\])\S+(?:[^\S\n]+\S+)*?\.(?:''' + _MEDIA_EXT_ALTERNATION + r'''))'''
r'''(?=[\s`"',;:)\]}]|$)[`"']?''',
re.IGNORECASE,
)
@ -2665,9 +2667,10 @@ class BasePlatformAdapter(ABC):
# (?<![/:\w.]) prevents matching inside URLs (e.g. https://…/img.png)
# and relative paths (./foo.png)
# (?:~/|/) anchors to absolute or home-relative paths
# (?:~/|/) anchors to absolute or home-relative Unix paths
# (?:[A-Za-z]:[/\\]) anchors to Windows drive-letter paths (#34632)
path_re = re.compile(
r'(?<![/:\w.])(?:~/|/)(?:[\w.\-]+/)*[\w.\-]+\.(?:' + ext_part + r')\b',
r'(?<![/:\w.])(?:~/|/|[A-Za-z]:[/\\])(?:[\w.\-]+[/\\])*[\w.\-]+\.(?:' + ext_part + r')\b',
re.IGNORECASE,
)

View file

@ -2804,21 +2804,8 @@ class TelegramAdapter(BasePlatformAdapter):
return slug
try:
# Build provider buttons — 2 per row
buttons: list = []
for p in providers:
count = p.get("total_models", len(p.get("models", [])))
label = f"{p['name']} ({count})"
if p.get("is_current"):
label = f"{label}"
# Compact callback data: mp:<slug> (max 64 bytes)
buttons.append(
InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
)
rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
keyboard = InlineKeyboardMarkup(rows)
# Build provider buttons — folds provider groups (display only).
keyboard = self._build_provider_keyboard(providers)
provider_label = get_label(current_provider)
text = self.format_message(
@ -2865,6 +2852,56 @@ class TelegramAdapter(BasePlatformAdapter):
_MODEL_PAGE_SIZE = 8
def _build_provider_keyboard(self, providers: list):
"""Build the top-level provider keyboard, folding provider groups.
Provider families (Kimi/Moonshot, MiniMax, xAI Grok, ...) collapse to
a single ``mpg:<gid>`` button; tapping it drills into a member
sub-keyboard. Single providers (and groups with only one authenticated
member) render as direct ``mp:<slug>`` buttons. Grouping mirrors the
CLI ``hermes model`` picker via the shared ``group_providers`` fold,
so all surfaces stay consistent.
"""
try:
from hermes_cli.models import group_providers
except Exception:
group_providers = None
by_slug = {p.get("slug"): p for p in providers}
def _provider_button(p):
count = p.get("total_models", len(p.get("models", [])))
label = f"{p['name']} ({count})"
if p.get("is_current"):
label = f"{label}"
return InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
buttons: list = []
if group_providers is not None:
for row in group_providers([p.get("slug") for p in providers]):
if row["kind"] == "group":
members = [by_slug[m] for m in row["members"] if m in by_slug]
count = sum(
m.get("total_models", len(m.get("models", []))) for m in members
)
label = f"{row['label']} ▸ ({count})"
if any(m.get("is_current") for m in members):
label = f"{label}"
buttons.append(
InlineKeyboardButton(label, callback_data=f"mpg:{row['group_id']}")
)
else:
p = by_slug.get(row["slug"])
if p is not None:
buttons.append(_provider_button(p))
else:
for p in providers:
buttons.append(_provider_button(p))
rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
return InlineKeyboardMarkup(rows)
def _build_model_keyboard(self, models: list, page: int) -> tuple:
"""Build paginated model buttons. Returns (keyboard, page_info_text)."""
page_size = self._MODEL_PAGE_SIZE
@ -3043,10 +3080,23 @@ class TelegramAdapter(BasePlatformAdapter):
# Clean up state
self._model_picker_state.pop(chat_id, None)
elif data == "mb":
# --- Back to provider list ---
elif data.startswith("mpg:"):
# --- Provider group selected: show member providers ---
group_id = data[4:]
try:
from hermes_cli.models import PROVIDER_GROUPS
_label, member_slugs = PROVIDER_GROUPS.get(group_id, ("", []))
except Exception:
_label, member_slugs = "", []
by_slug = {p["slug"]: p for p in state["providers"]}
members = [by_slug[m] for m in member_slugs if m in by_slug]
if not members:
await query.answer(text="Group not found.")
return
buttons = []
for p in state["providers"]:
for p in members:
count = p.get("total_models", len(p.get("models", [])))
label = f"{p['name']} ({count})"
if p.get("is_current"):
@ -3054,11 +3104,30 @@ class TelegramAdapter(BasePlatformAdapter):
buttons.append(
InlineKeyboardButton(label, callback_data=f"mp:{p['slug']}")
)
rows = [buttons[i : i + 2] for i in range(0, len(buttons), 2)]
rows.append([InlineKeyboardButton("✗ Cancel", callback_data="mx")])
rows.append([
InlineKeyboardButton("◀ Back", callback_data="mb"),
InlineKeyboardButton("✗ Cancel", callback_data="mx"),
])
keyboard = InlineKeyboardMarkup(rows)
await query.edit_message_text(
text=self.format_message(
(
f"⚙ *Model Configuration*\n\n"
f"Provider family: *{_label or group_id}*\n\n"
f"Select a provider:"
)
),
parse_mode=ParseMode.MARKDOWN_V2,
reply_markup=keyboard,
)
await query.answer()
elif data == "mb":
# --- Back to provider list (folds groups) ---
keyboard = self._build_provider_keyboard(state["providers"])
try:
provider_label = get_label(state["current_provider"])
except Exception:
@ -3107,7 +3176,7 @@ class TelegramAdapter(BasePlatformAdapter):
query_user_name = getattr(query.from_user, "first_name", None)
# --- Model picker callbacks ---
if data.startswith(("mp:", "mm:", "mb", "mx", "mg:")):
if data.startswith(("mp:", "mpg:", "mm:", "mb", "mx", "mg:")):
chat_id = str(query.message.chat_id) if query.message else None
if chat_id:
await self._handle_model_picker_callback(query, data, chat_id)

View file

@ -1180,12 +1180,48 @@ class WeixinAdapter(BasePlatformAdapter):
default=False,
)
# Text debounce batching (mirrors Telegram adapter pattern).
# iLink delivers messages individually, so rapid multi-message
# bursts (forwarded batches, paste-splits) each trigger a
# separate agent invocation. Default 3s delay / 5s split delay
# are tuned for iLink's typical delivery cadence. Tunable via
# config.yaml under
# ``gateway.platforms.weixin.extra.text_batch_delay_seconds`` /
# ``text_batch_split_delay_seconds``.
self._text_batch_delay_seconds = self._coerce_float_extra(
"text_batch_delay_seconds", 3.0
)
self._text_batch_split_delay_seconds = self._coerce_float_extra(
"text_batch_split_delay_seconds", 5.0
)
self._pending_text_batches: Dict[str, MessageEvent] = {}
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
if self._account_id and not self._token:
persisted = load_weixin_account(hermes_home, self._account_id)
if persisted:
self._token = str(persisted.get("token") or "").strip()
self._base_url = str(persisted.get("base_url") or self._base_url).strip().rstrip("/")
def _coerce_float_extra(self, key: str, default: float) -> float:
"""Read a float from ``config.extra``, guarding against bad/non-finite values.
The result is fed directly to ``asyncio.sleep()``, so NaN/Inf and
unparseable values fall back to ``default``.
"""
import math
value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
if value is None:
return float(default)
try:
parsed = float(value)
except (TypeError, ValueError):
return float(default)
if not math.isfinite(parsed) or parsed < 0:
return float(default)
return parsed
@staticmethod
def _coerce_list(value: Any) -> List[str]:
if value is None:
@ -1247,6 +1283,11 @@ class WeixinAdapter(BasePlatformAdapter):
async def disconnect(self) -> None:
_LIVE_ADAPTERS.pop(self._token, None)
self._running = False
for task in self._pending_text_batch_tasks.values():
if not task.done():
task.cancel()
self._pending_text_batches.clear()
self._pending_text_batch_tasks.clear()
if self._poll_task and not self._poll_task.done():
self._poll_task.cancel()
try:
@ -1395,12 +1436,10 @@ class WeixinAdapter(BasePlatformAdapter):
timestamp=datetime.now(),
)
logger.info("[%s] inbound from=%s type=%s media=%d", self.name, _safe_id(sender_id), source.chat_type, len(media_paths))
await self.handle_message(event)
@property
def enforces_own_access_policy(self) -> bool:
"""Weixin gates DM/group access at intake via dm_policy/group_policy."""
return True
if event.message_type == MessageType.TEXT:
self._enqueue_text_event(event)
else:
await self.handle_message(event)
def _is_dm_allowed(self, sender_id: str) -> bool:
if self._dm_policy == "disabled":
@ -1409,6 +1448,76 @@ class WeixinAdapter(BasePlatformAdapter):
return sender_id in self._allow_from
return True
@property
def enforces_own_access_policy(self) -> bool:
"""Weixin gates DM/group access at intake via dm_policy/group_policy."""
return True
# ------------------------------------------------------------------
# Text debounce batching
# ------------------------------------------------------------------
_SPLIT_THRESHOLD = 1800 # iLink chunks at ~2048 chars
def _text_batch_key(self, event: MessageEvent) -> str:
"""Session-scoped key for text message batching."""
from gateway.session import build_session_key
return build_session_key(
event.source,
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
)
def _enqueue_text_event(self, event: MessageEvent) -> None:
"""Buffer a text event and reset the flush timer.
When users forward multiple messages or send rapid-fire texts
via WeChat, each arrives as a separate iLink message. This
concatenates them and waits for a short quiet period before
dispatching the combined message.
"""
key = self._text_batch_key(event)
existing = self._pending_text_batches.get(key)
chunk_len = len(event.text or "")
if existing is None:
event._last_chunk_len = chunk_len # type: ignore[attr-defined]
self._pending_text_batches[key] = event
else:
if event.text:
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
existing._last_chunk_len = chunk_len # type: ignore[attr-defined]
if event.media_urls:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
prior_task = self._pending_text_batch_tasks.get(key)
if prior_task and not prior_task.done():
prior_task.cancel()
self._pending_text_batch_tasks[key] = asyncio.create_task(
self._flush_text_batch(key)
)
async def _flush_text_batch(self, key: str) -> None:
"""Wait for quiet period then dispatch aggregated text."""
current_task = asyncio.current_task()
try:
pending = self._pending_text_batches.get(key)
last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
if last_len >= self._SPLIT_THRESHOLD:
delay = self._text_batch_split_delay_seconds
else:
delay = self._text_batch_delay_seconds
await asyncio.sleep(delay)
if self._pending_text_batch_tasks.get(key) is not current_task:
return
event = self._pending_text_batches.pop(key, None)
if not event:
return
await self.handle_message(event)
finally:
if self._pending_text_batch_tasks.get(key) is current_task:
self._pending_text_batch_tasks.pop(key, None)
async def _collect_media(self, item: Dict[str, Any], media_paths: List[str], media_types: List[str]) -> None:
item_type = item.get("type")
if item_type == ITEM_IMAGE:

View file

@ -278,6 +278,43 @@ class WhatsAppAdapter(BasePlatformAdapter):
# notification before the normal "✓ whatsapp disconnected" fires.
self._shutting_down: bool = False
# Text debounce batching (mirrors Telegram adapter pattern).
# WhatsApp often delivers multiple messages in rapid succession
# (e.g. forwarded batches, paste-splits) — without debounce each
# message triggers a separate agent invocation, wasting tokens and
# flooding the user with reply fragments. Default 5s delay /
# 10s split delay are conservative for WhatsApp's delivery cadence.
# Tunable via config.yaml under
# ``gateway.platforms.whatsapp.extra.text_batch_delay_seconds`` /
# ``text_batch_split_delay_seconds``.
self._text_batch_delay_seconds = self._coerce_float_extra(
"text_batch_delay_seconds", 5.0
)
self._text_batch_split_delay_seconds = self._coerce_float_extra(
"text_batch_split_delay_seconds", 10.0
)
self._pending_text_batches: Dict[str, MessageEvent] = {}
self._pending_text_batch_tasks: Dict[str, asyncio.Task] = {}
def _coerce_float_extra(self, key: str, default: float) -> float:
"""Read a float from ``config.extra``, guarding against bad/non-finite values.
The result is fed directly to ``asyncio.sleep()``, so NaN/Inf and
unparseable values fall back to ``default``.
"""
import math
value = self.config.extra.get(key) if getattr(self.config, "extra", None) else None
if value is None:
return float(default)
try:
parsed = float(value)
except (TypeError, ValueError):
return float(default)
if not math.isfinite(parsed) or parsed < 0:
return float(default)
return parsed
def _effective_reply_prefix(self) -> str:
"""Return the prefix the Node bridge will add in self-chat mode."""
whatsapp_mode = os.getenv("WHATSAPP_MODE", "self-chat")
@ -1139,7 +1176,10 @@ class WhatsAppAdapter(BasePlatformAdapter):
for msg_data in messages:
event = await self._build_message_event(msg_data)
if event:
await self.handle_message(event)
if event.message_type == MessageType.TEXT:
self._enqueue_text_event(event)
else:
await self.handle_message(event)
except asyncio.CancelledError:
break
except Exception as e:
@ -1151,7 +1191,67 @@ class WhatsAppAdapter(BasePlatformAdapter):
await asyncio.sleep(5)
await asyncio.sleep(1) # Poll interval
# ── Text debounce batching ──────────────────────────────────────
_SPLIT_THRESHOLD = 6000 # WhatsApp supports ~65K chars; generous threshold
def _text_batch_key(self, event: MessageEvent) -> str:
"""Session-scoped key for text message batching."""
from gateway.session import build_session_key
return build_session_key(
event.source,
group_sessions_per_user=self.config.extra.get("group_sessions_per_user", True),
thread_sessions_per_user=self.config.extra.get("thread_sessions_per_user", False),
)
def _enqueue_text_event(self, event: MessageEvent) -> None:
"""Buffer a text event and reset the flush timer.
When WhatsApp delivers rapid-fire messages (e.g. forwarded
batches), this concatenates them and waits for a short quiet
period before dispatching the combined message.
"""
key = self._text_batch_key(event)
existing = self._pending_text_batches.get(key)
chunk_len = len(event.text or "")
if existing is None:
event._last_chunk_len = chunk_len # type: ignore[attr-defined]
self._pending_text_batches[key] = event
else:
if event.text:
existing.text = f"{existing.text}\n{event.text}" if existing.text else event.text
existing._last_chunk_len = chunk_len # type: ignore[attr-defined]
if event.media_urls:
existing.media_urls.extend(event.media_urls)
existing.media_types.extend(event.media_types)
prior_task = self._pending_text_batch_tasks.get(key)
if prior_task and not prior_task.done():
prior_task.cancel()
self._pending_text_batch_tasks[key] = asyncio.create_task(
self._flush_text_batch(key)
)
async def _flush_text_batch(self, key: str) -> None:
"""Wait for quiet period then dispatch aggregated text."""
current_task = asyncio.current_task()
try:
pending = self._pending_text_batches.get(key)
last_len = getattr(pending, "_last_chunk_len", 0) if pending else 0
if last_len >= self._SPLIT_THRESHOLD:
delay = self._text_batch_split_delay_seconds
else:
delay = self._text_batch_delay_seconds
await asyncio.sleep(delay)
event = self._pending_text_batches.pop(key, None)
if not event:
return
await self.handle_message(event)
finally:
if self._pending_text_batch_tasks.get(key) is current_task:
self._pending_text_batch_tasks.pop(key, None)
async def _build_message_event(self, data: Dict[str, Any]) -> Optional[MessageEvent]:
"""Build a MessageEvent from bridge message data, downloading images to cache."""
try:

View file

@ -1730,6 +1730,14 @@ class GatewayRunner:
self._running_agents: Dict[str, Any] = {}
self._running_agents_ts: Dict[str, float] = {} # start timestamp per session
self._pending_messages: Dict[str, str] = {} # Queued messages during interrupt
# Last successfully-resolved (non-empty) model, keyed by session. Used
# as a fallback when a fresh config read transiently returns an empty
# model (e.g. an mtime-keyed config-cache miss during a post-interrupt
# recovery turn). Without this, the agent is built with model="" and
# every API call fails HTTP 400 "No models provided" — the session goes
# silent until the user manually re-sends. See #35314. ``"*"`` holds a
# process-wide last-known-good for sessions seen for the first time.
self._last_resolved_model: Dict[str, str] = {}
# Overflow buffer for explicit /queue commands. The adapter-level
# _pending_messages dict is a single slot per session (designed for
# "next-turn" follow-ups where repeated sends collapse into one
@ -2488,6 +2496,32 @@ class GatewayRunner:
except Exception:
pass
# Final safety net (#35314): if resolution still produced an empty
# model — e.g. a transient config-cache miss during a post-interrupt
# recovery turn returned an empty user_config — reuse the last model we
# successfully resolved for this session (or, failing that, the most
# recent one resolved process-wide). Building an agent with model=""
# makes every API call fail HTTP 400 "No models provided" and the
# session goes silent until the user manually re-sends. ``getattr``
# guards against bare test runners built via ``object.__new__``.
_last_good = getattr(self, "_last_resolved_model", None)
if _last_good is not None:
if not model:
_recovered = _last_good.get(resolved_session_key or "") or _last_good.get("*")
if _recovered:
logger.warning(
"Empty model resolved for session=%s — recovering "
"last-known-good model %s (config read likely returned "
"empty; see #35314)",
resolved_session_key or "", _recovered,
)
model = _recovered
elif model:
# Cache the good resolution for future recovery turns.
if resolved_session_key:
_last_good[resolved_session_key] = model
_last_good["*"] = model
return model, runtime_kwargs
def _resolve_turn_agent_config(self, user_message: str, model: str, runtime_kwargs: dict) -> dict:
@ -2784,10 +2818,12 @@ class GatewayRunner:
"""Mark a queued platform as paused — keep it in ``_failed_platforms``
but stop the reconnect watcher from hammering it.
Used by the circuit breaker after ``_PAUSE_AFTER_FAILURES`` consecutive
retryable failures, and by ``/platform pause <name>`` for manual
intervention. Paused platforms are surfaced in ``/platform list``
and resumed with ``/platform resume <name>``.
Used by ``/platform pause <name>`` for manual operator intervention.
Paused platforms are surfaced in ``/platform list`` and resumed with
``/platform resume <name>``. Note: the reconnect watcher does NOT
auto-pause retryable (network/DNS) failures keep retrying at the
backoff cap indefinitely so a transient outage self-heals without
manual intervention.
"""
info = getattr(self, "_failed_platforms", {}).get(platform)
if info is None:
@ -5865,15 +5901,17 @@ class GatewayRunner:
"""Background task that periodically retries connecting failed platforms.
Uses exponential backoff: 30s 60s 120s 240s 300s (cap).
Retryable failures keep retrying at the backoff cap indefinitely
but if a platform fails ``_PAUSE_AFTER_FAILURES`` times in a row
without ever succeeding, it is *paused*: kept in the retry queue
but no longer hammered. The user surfaces it with ``/platform list``
and resumes it with ``/platform resume <name>``. Non-retryable
failures (bad auth, etc.) still drop out of the queue immediately.
Retryable failures (network/DNS blips) keep retrying at the backoff
cap indefinitely they self-heal once connectivity returns, so a
transient outage never requires manual intervention. Non-retryable
failures (bad auth, etc.) drop out of the queue immediately. The
circuit breaker (``_pause_failed_platform`` / ``/platform pause``)
remains available for manual operator control via ``/platform list``
and ``/platform resume <name>``, but is no longer triggered
automatically auto-pausing a recovered platform was the cause of
bots silently staying dead after a transient DNS failure.
"""
_BACKOFF_CAP = 300 # 5 minutes max between retries
_PAUSE_AFTER_FAILURES = 10 # circuit-breaker threshold
await asyncio.sleep(10) # initial delay — let startup finish
while self._running:
@ -5968,14 +6006,14 @@ class GatewayRunner:
"Reconnect %s failed, next retry in %ds",
platform.value, backoff,
)
if attempt >= _PAUSE_AFTER_FAILURES:
self._pause_failed_platform(
platform,
reason=(
adapter.fatal_error_message
or "failed to reconnect"
),
)
# Retryable failures (network/DNS blips) keep retrying
# at the backoff cap indefinitely — they self-heal once
# connectivity returns. We do NOT auto-pause them: a
# transient outage must never require manual `/platform
# resume` to recover. Non-retryable failures (bad auth,
# etc.) already drop out of the queue via the
# `not fatal_error_retryable` branch above, so anything
# reaching here is by definition retryable.
except Exception as e:
self._update_platform_runtime_status(
platform.value,
@ -5990,8 +6028,9 @@ class GatewayRunner:
"Reconnect %s error: %s, next retry in %ds",
platform.value, e, backoff,
)
if attempt >= _PAUSE_AFTER_FAILURES:
self._pause_failed_platform(platform, reason=str(e))
# A raised exception during reconnect (connect timeout, DNS
# resolution failure, etc.) is inherently transient — keep
# retrying at the backoff cap rather than auto-pausing.
# Check every 10 seconds for platforms that need reconnection
for _ in range(10):
@ -10531,6 +10570,22 @@ class GatewayRunner:
except Exception as exc:
logger.warning("Picker model switch failed for cached agent: %s", exc)
# Persist the new model to the session DB so the
# dashboard shows the updated model (#34850).
_sess_db = getattr(_self, "_session_db", None)
if _sess_db is not None:
try:
_sess_entry = _self.session_store.get_or_create_session(
event.source
)
_sess_db.update_session_model(
_sess_entry.session_id, result.new_model
)
except Exception as exc:
logger.debug(
"Failed to persist model switch to DB: %s", exc
)
# Store model note + session override
if not hasattr(_self, "_pending_model_notes"):
_self._pending_model_notes = {}
@ -10668,6 +10723,20 @@ class GatewayRunner:
except Exception as exc:
logger.warning("In-place model switch failed for cached agent: %s", exc)
# Persist the new model to the session DB so the dashboard
# shows the updated model (#34850).
_sess_db = getattr(self, "_session_db", None)
if _sess_db is not None:
try:
_sess_entry = self.session_store.get_or_create_session(source)
_sess_db.update_session_model(
_sess_entry.session_id, result.new_model
)
except Exception as exc:
logger.debug(
"Failed to persist model switch to DB: %s", exc
)
# Store a note to prepend to the next user message so the model
# knows about the switch (avoids system messages mid-history).
if not hasattr(self, "_pending_model_notes"):
@ -15313,8 +15382,52 @@ class GatewayRunner:
("compression", "target_ratio"),
("compression", "protect_last_n"),
("agent", "disabled_toolsets"),
("memory", "provider"),
)
_HONCHO_CACHE_BUSTING_KEYS = (
"honcho.peer_name",
"honcho.ai_peer",
"honcho.pin_peer_name",
"honcho.runtime_peer_prefix",
"honcho.user_peer_aliases",
)
_HONCHO_CACHE_BUSTING_MEMO: dict[tuple[str, int | None], dict[str, Any]] = {}
@classmethod
def _empty_honcho_cache_busting_config(cls) -> dict[str, Any]:
return {key: None for key in cls._HONCHO_CACHE_BUSTING_KEYS}
@classmethod
def _extract_honcho_cache_busting_config(cls) -> dict[str, Any]:
"""Extract Honcho identity keys, memoized by honcho.json mtime."""
try:
from plugins.memory.honcho.client import HonchoClientConfig, resolve_config_path
path = resolve_config_path()
try:
mtime_ns = path.stat().st_mtime_ns
except OSError:
mtime_ns = None
memo_key = (str(path), mtime_ns)
cached = cls._HONCHO_CACHE_BUSTING_MEMO.get(memo_key)
if cached is not None:
return dict(cached)
hcfg = HonchoClientConfig.from_global_config(config_path=path)
aliases = hcfg.user_peer_aliases or {}
values = {
"honcho.peer_name": hcfg.peer_name,
"honcho.ai_peer": hcfg.ai_peer,
"honcho.pin_peer_name": bool(hcfg.pin_peer_name),
"honcho.runtime_peer_prefix": hcfg.runtime_peer_prefix or "",
"honcho.user_peer_aliases": sorted(aliases.items()) if isinstance(aliases, dict) else [],
}
cls._HONCHO_CACHE_BUSTING_MEMO = {memo_key: values}
return dict(values)
except Exception:
return cls._empty_honcho_cache_busting_config()
@classmethod
def _extract_cache_busting_config(cls, user_config: dict | None) -> dict:
"""Pull values that must bust the cached agent.
@ -15345,26 +15458,12 @@ class GatewayRunner:
out["tools.registry_generation"] = None
# Honcho identity-mapping keys live in honcho.json, not user_config.
# HonchoSessionManager freezes the resolved peer_name / ai_peer /
# pin / aliases / prefix at construction; without busting here,
# mid-flight honcho.json edits go unread until the next unrelated
# cache eviction.
try:
from plugins.memory.honcho.client import HonchoClientConfig
hcfg = HonchoClientConfig.from_global_config()
out["honcho.peer_name"] = hcfg.peer_name
out["honcho.ai_peer"] = hcfg.ai_peer
out["honcho.pin_peer_name"] = bool(hcfg.pin_peer_name)
out["honcho.runtime_peer_prefix"] = hcfg.runtime_peer_prefix or ""
aliases = hcfg.user_peer_aliases or {}
out["honcho.user_peer_aliases"] = sorted(aliases.items()) if isinstance(aliases, dict) else []
except Exception:
out["honcho.peer_name"] = None
out["honcho.ai_peer"] = None
out["honcho.pin_peer_name"] = None
out["honcho.runtime_peer_prefix"] = None
out["honcho.user_peer_aliases"] = None
# Only read that file when Honcho is the active memory provider.
provider = cfg_get(cfg, "memory", "provider")
if isinstance(provider, str) and provider.lower() == "honcho":
out.update(cls._extract_honcho_cache_busting_config())
else:
out.update(cls._empty_honcho_cache_busting_config())
return out
@ -17203,7 +17302,7 @@ class GatewayRunner:
_hc = _hm.get("content", "")
if "MEDIA:" in _hc:
_TOOL_MEDIA_RE = re.compile(
r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
r'txt|csv|apk|ipa))',
@ -17529,7 +17628,7 @@ class GatewayRunner:
content = msg.get("content", "")
if "MEDIA:" in content:
_TOOL_MEDIA_RE = re.compile(
r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
r'txt|csv|apk|ipa))',

View file

@ -12,14 +12,16 @@ import threading
import time
from pathlib import Path
from hermes_constants import get_hermes_home
from typing import Dict, List, Optional
from typing import TYPE_CHECKING, Dict, List, Optional
from rich.console import Console
from rich.panel import Panel
from rich.table import Table
from prompt_toolkit import print_formatted_text as _pt_print
from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
# rich and prompt_toolkit are imported lazily (inside the functions that use
# them) rather than at module level. Importing this module is on the TUI
# gateway's critical startup path purely to reach the lightweight update-check
# helpers (``prefetch_update_check``); pulling rich.console + prompt_toolkit
# eagerly added ~50ms of wasted imports before ``gateway.ready`` could fire.
# Keep the type-only reference available to checkers without the runtime cost.
if TYPE_CHECKING:
from rich.console import Console
logger = logging.getLogger(__name__)
@ -36,6 +38,8 @@ _RST = "\033[0m"
def cprint(text: str):
"""Print ANSI-colored text through prompt_toolkit's renderer."""
from prompt_toolkit import print_formatted_text as _pt_print
from prompt_toolkit.formatted_text import ANSI as _PT_ANSI
_pt_print(_PT_ANSI(text))
@ -471,7 +475,7 @@ def _display_toolset_name(toolset_name: str) -> str:
)
def build_welcome_banner(console: Console, model: str, cwd: str,
def build_welcome_banner(console: "Console", model: str, cwd: str,
tools: List[dict] = None,
enabled_toolsets: List[str] = None,
session_id: str = None,
@ -490,6 +494,8 @@ def build_welcome_banner(console: Console, model: str, cwd: str,
context_length: Model's context window size in tokens.
"""
from model_tools import check_tool_availability, TOOLSET_REQUIREMENTS
from rich.panel import Panel
from rich.table import Table
if get_toolset_for_tool is None:
from model_tools import get_toolset_for_tool

View file

@ -286,9 +286,22 @@ def detect_install_method(project_root: Optional[Path] = None) -> str:
Resolution order:
1. Stamped ``~/.hermes/.install_method`` file (written by installers)
2. HERMES_MANAGED env / .managed marker (NixOS, Homebrew)
3. Container detection (/.dockerenv, /run/.containerenv, cgroup)
4. .git directory presence -> 'git'
5. Fallback -> 'pip'
3. .git directory presence -> 'git'
4. Fallback -> 'pip'
Note: running inside a container is NOT treated as "docker" on its own.
The two supported install paths both self-identify via the
``.install_method`` stamp (caught by step 1), so neither relies on
container detection here:
- the curl installer (scripts/install.sh, the README/website install
command) git-clones the repo and stamps ``git``;
- the published ``nousresearch/hermes-agent`` image stamps ``docker``
at boot via ``docker/stage2-hook.sh``.
An unsupported manual install dropped into a container (no stamp) was
wrongly classified as the published image by bare container detection,
so ``hermes update`` bailed with "doesn't apply inside the Docker
container". Without that fallback such installs fall through to the
``.git``/pip checks and behave like any off-path install. See issue #34397.
"""
stamp = get_hermes_home() / ".install_method"
try:
@ -300,9 +313,6 @@ def detect_install_method(project_root: Optional[Path] = None) -> str:
managed = get_managed_system()
if managed:
return managed.lower().replace(" ", "-")
from hermes_constants import is_container
if is_container():
return "docker"
if project_root is None:
project_root = Path(__file__).parent.parent.resolve()
if (project_root / ".git").is_dir():
@ -320,6 +330,34 @@ def stamp_install_method(method: str) -> None:
pass
def is_uv_tool_install() -> bool:
"""Return True when the *running* Hermes lives in a ``uv tool`` layout.
``uv tool install hermes-agent`` places the install at
``.../uv/tools/hermes-agent/...`` (default ``~/.local/share/uv/tools``,
or ``$UV_TOOL_DIR/...``). Such installs live outside any virtualenv, so
``uv pip install`` fails with ``No virtual environment found`` and the
update path must use ``uv tool upgrade`` instead.
Detection is intentionally restricted to properties of the running
interpreter (``sys.prefix`` / ``sys.executable``). We deliberately do
NOT consult ``uv tool list``: it would also return True when
``hermes-agent`` happens to be uv-tool-installed on the machine while
the *active* Hermes is a regular pip/venv install, causing
``hermes update`` to upgrade the wrong copy. It would also block on a
subprocess call (~seconds) just to compute a recommendation string.
"""
def _has_uv_tool_marker(path: str) -> bool:
norm = os.path.normpath(path).replace(os.sep, "/").lower()
return "/uv/tools/hermes-agent/" in norm + "/"
if _has_uv_tool_marker(sys.prefix):
return True
if _has_uv_tool_marker(sys.executable or ""):
return True
return False
def recommended_update_command_for_method(method: str) -> str:
"""Return the update command or guidance for a given install method."""
if method == "nixos":
@ -329,9 +367,10 @@ def recommended_update_command_for_method(method: str) -> str:
if method == "docker":
return "docker pull nousresearch/hermes-agent:latest"
if method == "pip":
if is_uv_tool_install():
return "uv tool upgrade hermes-agent"
import shutil
uv = shutil.which("uv")
if uv:
if shutil.which("uv"):
return "uv pip install --upgrade hermes-agent"
return "pip install --upgrade hermes-agent"
return "hermes update"
@ -1184,6 +1223,11 @@ DEFAULT_CONFIG = {
# Mirrors `hermes -c` muscle memory. Default off so existing
# users aren't surprised. HERMES_TUI_RESUME=<id> always wins.
"tui_auto_resume_recent": False,
# When true (default), `hermes --tui` drops a one-time hint
# ("subagents working · /agents to watch live") the first time a turn
# starts delegating, nudging the user toward the live spawn-tree
# dashboard. Set false to suppress the hint.
"tui_agents_nudge": True,
"bell_on_complete": False,
"show_reasoning": False,
"streaming": False,
@ -1203,6 +1247,13 @@ DEFAULT_CONFIG = {
# class of over-claim that otherwise forces users to run
# `git status` to verify edits landed. Set false to suppress.
"file_mutation_verifier": True,
# Turn-completion explainer. When true (default), the agent appends a
# one-line explanation to its final response whenever a turn ends
# abnormally with no usable reply — empty content after retries, a
# partial/truncated stream, a still-pending tool result, or an
# iteration/budget limit. Replaces the bare "(empty)" sentinel so the
# failure isn't silent from the UI's perspective. Set false to suppress.
"turn_completion_explainer": True,
"show_cost": False, # Show $ cost in the status bar (off by default)
"skin": "default",
# UI language for static user-facing messages (approval prompts, a

View file

@ -204,6 +204,60 @@ def _fail_and_issue(text: str, detail: str, fix: str, issues: list[str]) -> None
issues.append(fix)
def _read_pyproject_version() -> str | None:
"""Read the ``version = "..."`` from ``pyproject.toml`` at the project root.
Returns None when running from an installed wheel (no pyproject.toml ships
with the package) or when the file can't be parsed. Reads only the
``[project]`` version, ignoring any version strings that appear in other
tables.
"""
pyproject = PROJECT_ROOT / "pyproject.toml"
try:
text = pyproject.read_text(encoding="utf-8")
except OSError:
return None
in_project = False
for raw in text.splitlines():
line = raw.strip()
if line.startswith("[") and line.endswith("]"):
in_project = line == "[project]"
continue
if in_project and line.startswith("version") and "=" in line:
value = line.split("=", 1)[1]
value = value.split("#", 1)[0].strip().strip("\"'")
return value or None
return None
def _check_version_consistency(issues: list[str]) -> None:
"""Verify pyproject.toml version matches hermes_cli.__version__.
A git conflict resolution (reset/merge) can revert one file without the
other, leaving ``hermes --version`` reporting a stale version while
``pyproject.toml`` is current. Detect that drift so users can re-sync.
Silent no-op for installed wheels where pyproject.toml isn't present.
"""
try:
from hermes_cli import __version__ as init_version
except Exception:
return
pyproject_version = _read_pyproject_version()
if pyproject_version is None:
# Installed wheel or unreadable pyproject — nothing to cross-check.
return
if pyproject_version == init_version:
check_ok("Version files consistent", f"({init_version})")
else:
_fail_and_issue(
"Version mismatch between source files",
f"(pyproject.toml {pyproject_version} != hermes_cli/__init__.py {init_version})",
"Re-sync version files (e.g. run 'hermes update', or set "
"hermes_cli/__init__.py __version__ to match pyproject.toml)",
issues,
)
def _check_s6_supervision(issues: list[str]) -> None:
"""Inside a container under our s6 /init, surface what s6 sees.
@ -509,6 +563,10 @@ def run_doctor(args):
check_ok("Virtual environment active")
else:
check_warn("Not in virtual environment", "(recommended)")
# Detect drift between pyproject.toml and hermes_cli/__init__.py versions
# (a git conflict resolution can silently revert one but not the other).
_check_version_consistency(issues)
_section("Required Packages")
required_packages = [

View file

@ -396,6 +396,41 @@ def workspaces_root(board: Optional[str] = None) -> Path:
return board_dir(slug) / "workspaces"
def attachments_root(board: Optional[str] = None) -> Path:
"""Return the directory under which task file attachments are stored.
Mirrors :func:`worker_logs_dir` / :func:`workspaces_root`: anchored
per-board so attachments don't leak between projects. Each task gets
its own ``<root>/.../attachments/<task_id>/`` subdirectory.
``HERMES_KANBAN_ATTACHMENTS_ROOT`` pins the path directly (highest
precedence) for tests and unusual deployments.
``default`` uses ``<root>/kanban/attachments/``; other boards use
``<root>/kanban/boards/<slug>/attachments/``.
Workers (which run with full file-tool access) read attached files
by the absolute path surfaced in :func:`build_worker_context`. On the
local terminal backend the default for kanban that path resolves
directly. Remote backends (Docker/Modal) need this directory mounted;
see the kanban docs.
"""
override = os.environ.get("HERMES_KANBAN_ATTACHMENTS_ROOT", "").strip()
if override:
return Path(override).expanduser()
slug = _normalize_board_slug(board)
if slug is None:
slug = get_current_board()
if slug == DEFAULT_BOARD:
return kanban_home() / "kanban" / "attachments"
return board_dir(slug) / "attachments"
def task_attachments_dir(task_id: str, board: Optional[str] = None) -> Path:
"""Return the per-task attachment directory ``<root>/<task_id>/``."""
return attachments_root(board=board) / task_id
def worker_logs_dir(board: Optional[str] = None) -> Path:
"""Return the directory under which per-task worker logs are written.
@ -831,6 +866,20 @@ class Comment:
created_at: int
@dataclass
class Attachment:
"""In-memory view of a row from the ``task_attachments`` table."""
id: int
task_id: str
filename: str
stored_path: str
content_type: Optional[str]
size: int
uploaded_by: Optional[str]
created_at: int
@dataclass
class Event:
id: int
@ -957,6 +1006,23 @@ CREATE TABLE IF NOT EXISTS task_runs (
error TEXT
);
-- Files attached to a task (PDFs, images, source documents). The blob
-- lives on disk under ``attachments_root(board)/<task_id>/<stored_name>``;
-- this row carries metadata + the absolute ``stored_path`` so the
-- dashboard can list/download and ``build_worker_context`` can surface
-- the absolute path to the worker (which has full file-tool access). See
-- #35338.
CREATE TABLE IF NOT EXISTS task_attachments (
id INTEGER PRIMARY KEY AUTOINCREMENT,
task_id TEXT NOT NULL,
filename TEXT NOT NULL,
stored_path TEXT NOT NULL,
content_type TEXT,
size INTEGER NOT NULL DEFAULT 0,
uploaded_by TEXT,
created_at INTEGER NOT NULL
);
-- Subscription from a gateway source (platform + chat + thread) to a
-- task. The gateway's kanban-notifier watcher tails task_events and
-- pushes ``completed`` / ``blocked`` / ``spawn_auto_blocked`` events to
@ -981,6 +1047,7 @@ CREATE INDEX IF NOT EXISTS idx_comments_task ON task_comments(task_id, c
CREATE INDEX IF NOT EXISTS idx_events_task ON task_events(task_id, created_at);
CREATE INDEX IF NOT EXISTS idx_runs_task ON task_runs(task_id, started_at);
CREATE INDEX IF NOT EXISTS idx_runs_status ON task_runs(status);
CREATE INDEX IF NOT EXISTS idx_attachments_task ON task_attachments(task_id, created_at);
CREATE INDEX IF NOT EXISTS idx_notify_task ON kanban_notify_subs(task_id);
"""
@ -1637,6 +1704,140 @@ def _migrate_add_optional_columns(conn: sqlite3.Connection) -> None:
(new, old),
)
_rebuild_drifted_tables(conn)
# Legacy DBs defined these tables with a ``TEXT PRIMARY KEY`` id (or, for
# ``kanban_notify_subs``, a nullable ``TEXT last_event_id``). The current
# schema uses ``INTEGER PRIMARY KEY AUTOINCREMENT`` / ``INTEGER NOT NULL
# DEFAULT 0``. ``CREATE TABLE IF NOT EXISTS`` skips existing tables
# regardless of schema and ``_add_column_if_missing`` only adds columns, so
# neither can fix a drifted column type — the table must be rebuilt. See
# #35096.
#
# Each entry pairs the canonical CREATE TABLE with the CREATE INDEX
# statements that DROP TABLE would otherwise take down with it (including
# ``idx_events_run``, added by the additive pass above). To guard against
# this list drifting from SCHEMA_SQL, ``test_rebuilt_schema_matches_fresh``
# asserts a rebuilt legacy DB is byte-identical to a fresh one.
_REBUILD_SPECS = {
"task_events": (
"CREATE TABLE task_events ("
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
" task_id TEXT NOT NULL, run_id INTEGER, kind TEXT NOT NULL,"
" payload TEXT, created_at INTEGER NOT NULL)",
(
"CREATE INDEX idx_events_task ON task_events(task_id, created_at)",
"CREATE INDEX idx_events_run ON task_events(run_id, id)",
),
),
"task_comments": (
"CREATE TABLE task_comments ("
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
" task_id TEXT NOT NULL, author TEXT NOT NULL, body TEXT NOT NULL,"
" created_at INTEGER NOT NULL)",
("CREATE INDEX idx_comments_task ON task_comments(task_id, created_at)",),
),
"task_runs": (
"CREATE TABLE task_runs ("
" id INTEGER PRIMARY KEY AUTOINCREMENT,"
" task_id TEXT NOT NULL, profile TEXT, step_key TEXT,"
" status TEXT NOT NULL, claim_lock TEXT, claim_expires INTEGER,"
" worker_pid INTEGER, max_runtime_seconds INTEGER,"
" last_heartbeat_at INTEGER, started_at INTEGER NOT NULL,"
" ended_at INTEGER, outcome TEXT, summary TEXT, metadata TEXT,"
" error TEXT)",
(
"CREATE INDEX idx_runs_task ON task_runs(task_id, started_at)",
"CREATE INDEX idx_runs_status ON task_runs(status)",
),
),
"kanban_notify_subs": (
"CREATE TABLE kanban_notify_subs ("
" task_id TEXT NOT NULL, platform TEXT NOT NULL, chat_id TEXT NOT NULL,"
" thread_id TEXT NOT NULL DEFAULT '', user_id TEXT,"
" notifier_profile TEXT, created_at INTEGER NOT NULL,"
" last_event_id INTEGER NOT NULL DEFAULT 0,"
" PRIMARY KEY (task_id, platform, chat_id, thread_id))",
("CREATE INDEX idx_notify_task ON kanban_notify_subs(task_id)",),
),
}
def _table_has_drifted(conn: sqlite3.Connection, table: str) -> bool:
"""True when ``table`` still carries the legacy (pre-AUTOINCREMENT) shape."""
info = conn.execute(f"PRAGMA table_info({table})").fetchall()
if not info:
return False # table absent — nothing to rebuild
if table == "kanban_notify_subs":
lei = next((c for c in info if c["name"] == "last_event_id"), None)
return lei is not None and (lei["type"] or "").upper() != "INTEGER"
# task_events / task_comments / task_runs: id must be INTEGER and a PK.
id_col = next((c for c in info if c["name"] == "id"), None)
if id_col is None:
return False
return not ((id_col["type"] or "").upper() == "INTEGER" and id_col["pk"])
def _rebuild_drifted_tables(conn: sqlite3.Connection) -> None:
"""Rebuild any kanban table whose column types drifted from SCHEMA_SQL.
Old boards crash the gateway notifier (``int(None)`` on a NULL id in
``unseen_events_for_sub``) and never match the ``id > cursor`` filter, so
every kanban notification is silently lost (#35096). Each affected table is
rebuilt with the standard SQLite pattern CREATE new INSERT shared
columns DROP old RENAME recreating its indexes too (DROP TABLE takes
them down). The legacy TEXT ids are dropped (they aren't valid integers);
AUTOINCREMENT assigns fresh ones and ``last_event_id`` cursors reset to 0,
so the first post-migration tick replays a task's event history once —
the safe failure mode for a feature that was already fully broken.
The whole pass runs in one transaction so an interruption can't leave a
table half-renamed, and under ``connect()``'s init locks so nothing races
it. Idempotent: a correctly-typed DB skips every table and returns without
opening a transaction.
"""
drifted = [t for t in _REBUILD_SPECS if _table_has_drifted(conn, t)]
if not drifted:
return
conn.execute("BEGIN IMMEDIATE")
try:
for table in drifted:
create_sql, index_sqls = _REBUILD_SPECS[table]
old_cols = [c["name"] for c in conn.execute(f"PRAGMA table_info({table})")]
_log.info("kanban migration: rebuilding %s to match current schema", table)
conn.execute(f"ALTER TABLE {table} RENAME TO {table}_legacy")
conn.execute(create_sql)
new_cols = {c["name"] for c in conn.execute(f"PRAGMA table_info({table})")}
if table == "kanban_notify_subs":
# Cast the legacy TEXT cursor to INTEGER; NULL / non-numeric → 0.
shared = [c for c in old_cols if c in new_cols and c != "last_event_id"]
cols_csv = ", ".join(shared)
conn.execute(
f"INSERT INTO {table} ({cols_csv}, last_event_id) "
f"SELECT {cols_csv}, COALESCE(CAST(last_event_id AS INTEGER), 0) "
f"FROM {table}_legacy"
)
else:
# Drop the legacy TEXT id; AUTOINCREMENT reassigns it.
shared = [c for c in old_cols if c in new_cols and c != "id"]
cols_csv = ", ".join(shared)
conn.execute(
f"INSERT INTO {table} ({cols_csv}) "
f"SELECT {cols_csv} FROM {table}_legacy"
)
conn.execute(f"DROP TABLE {table}_legacy")
for index_sql in index_sqls:
conn.execute(index_sql)
conn.execute("COMMIT")
except Exception:
try:
conn.execute("ROLLBACK")
except sqlite3.OperationalError:
pass
raise
def _check_file_length_invariant(conn: sqlite3.Connection) -> None:
"""Read the SQLite header page_count and compare against actual file size.
@ -2252,6 +2453,121 @@ def list_comments(conn: sqlite3.Connection, task_id: str) -> list[Comment]:
]
# ---------------------------------------------------------------------------
# Attachments
# ---------------------------------------------------------------------------
def add_attachment(
conn: sqlite3.Connection,
task_id: str,
*,
filename: str,
stored_path: str,
content_type: Optional[str] = None,
size: int = 0,
uploaded_by: Optional[str] = None,
) -> int:
"""Record a file attachment for a task. Returns the new attachment id.
The caller is responsible for writing the blob to ``stored_path``
first (under :func:`task_attachments_dir`); this only persists the
metadata row and appends an ``attached`` event.
"""
if not filename or not filename.strip():
raise ValueError("attachment filename is required")
if not stored_path or not stored_path.strip():
raise ValueError("attachment stored_path is required")
now = int(time.time())
with write_txn(conn):
if not conn.execute(
"SELECT 1 FROM tasks WHERE id = ?", (task_id,)
).fetchone():
raise ValueError(f"unknown task {task_id}")
cur = conn.execute(
"INSERT INTO task_attachments "
"(task_id, filename, stored_path, content_type, size, uploaded_by, created_at) "
"VALUES (?, ?, ?, ?, ?, ?, ?)",
(
task_id,
filename.strip(),
stored_path,
content_type,
int(size),
uploaded_by,
now,
),
)
_append_event(
conn,
task_id,
"attached",
{"filename": filename.strip(), "size": int(size), "by": uploaded_by},
)
return int(cur.lastrowid or 0)
def list_attachments(conn: sqlite3.Connection, task_id: str) -> list[Attachment]:
rows = conn.execute(
"SELECT * FROM task_attachments WHERE task_id = ? ORDER BY created_at ASC, id ASC",
(task_id,),
).fetchall()
return [
Attachment(
id=r["id"],
task_id=r["task_id"],
filename=r["filename"],
stored_path=r["stored_path"],
content_type=r["content_type"],
size=r["size"] or 0,
uploaded_by=r["uploaded_by"],
created_at=r["created_at"],
)
for r in rows
]
def get_attachment(conn: sqlite3.Connection, attachment_id: int) -> Optional[Attachment]:
r = conn.execute(
"SELECT * FROM task_attachments WHERE id = ?", (attachment_id,)
).fetchone()
if r is None:
return None
return Attachment(
id=r["id"],
task_id=r["task_id"],
filename=r["filename"],
stored_path=r["stored_path"],
content_type=r["content_type"],
size=r["size"] or 0,
uploaded_by=r["uploaded_by"],
created_at=r["created_at"],
)
def delete_attachment(conn: sqlite3.Connection, attachment_id: int) -> Optional[Attachment]:
"""Delete an attachment row and its on-disk blob. Returns the removed row.
Returns ``None`` when no row matched. The blob is removed best-effort
(a missing file is not an error); the metadata row is the source of
truth for whether an attachment "exists".
"""
with write_txn(conn):
att = get_attachment(conn, attachment_id)
if att is None:
return None
conn.execute("DELETE FROM task_attachments WHERE id = ?", (attachment_id,))
_append_event(
conn, att.task_id, "attachment_removed", {"filename": att.filename}
)
try:
p = Path(att.stored_path)
if p.is_file():
p.unlink()
except OSError:
pass
return att
def list_events(conn: sqlite3.Connection, task_id: str) -> list[Event]:
rows = conn.execute(
"SELECT * FROM task_events WHERE task_id = ? ORDER BY created_at ASC, id ASC",
@ -2457,7 +2773,9 @@ def _has_sticky_block(conn: sqlite3.Connection, task_id: str) -> bool:
return bool(row) and row["kind"] == "blocked"
def recompute_ready(conn: sqlite3.Connection) -> int:
def recompute_ready(
conn: sqlite3.Connection, failure_limit: int = None,
) -> int:
"""Promote ``todo`` tasks to ``ready`` when all parents are ``done`` or ``archived``.
Returns the number of tasks promoted. Safe to call inside or outside
@ -2465,17 +2783,34 @@ def recompute_ready(conn: sqlite3.Connection) -> int:
``blocked`` tasks are also considered for promotion (so a task
blocked purely by a parent dependency unblocks itself when the
parent completes), *except* when the most recent block event was a
worker-initiated ``kanban_block`` those stay blocked until an
explicit ``kanban_unblock`` (#28712). Without that guard, a
``review-required`` handoff would auto-respawn, the fresh worker
would find nothing to do, exit cleanly, get recorded as a protocol
violation, and the cycle would repeat indefinitely.
parent completes), *except* in two cases:
1. The most recent block event was a worker-initiated
``kanban_block`` those stay blocked until an explicit
``kanban_unblock`` (#28712).
2. The task's ``consecutive_failures`` has reached the effective
failure limit. This prevents infinite retry loops when a task
repeatedly exhausts its iteration budget: without this guard the
counter would reset on every recovery cycle and the circuit
breaker could never trip (#35072).
The effective failure limit resolves in the same order as the
circuit breaker in ``_record_task_failure`` so the two never
disagree about when a task is permanently blocked:
1. per-task ``max_retries`` if set
2. caller-supplied ``failure_limit`` (the dispatcher passes the
``kanban.failure_limit`` config value through ``dispatch_once``)
3. ``DEFAULT_FAILURE_LIMIT``
"""
if failure_limit is None:
failure_limit = DEFAULT_FAILURE_LIMIT
promoted = 0
with write_txn(conn):
todo_rows = conn.execute(
"SELECT id, status FROM tasks WHERE status IN ('todo', 'blocked')"
"SELECT id, status, consecutive_failures, max_retries "
"FROM tasks WHERE status IN ('todo', 'blocked')"
).fetchall()
for row in todo_rows:
task_id = row["id"]
@ -2493,13 +2828,25 @@ def recompute_ready(conn: sqlite3.Connection) -> int:
(task_id,),
).fetchall()
if all(p["status"] in ("done", "archived") for p in parents):
# Blocked tasks also get their failure counters reset —
# this is effectively an auto-unblock (circuit-breaker
# recovery; worker-initiated blocks are skipped above).
if cur_status == "blocked":
# Don't auto-recover tasks that have hit the
# circuit-breaker failure limit. Without this
# guard, a task that repeatedly exhausts its
# iteration budget would cycle forever:
# block → auto-recover → respawn → budget
# exhausted → block → … The counter must also
# be preserved so the breaker can accumulate
# across recovery cycles.
failures = int(row["consecutive_failures"] or 0)
task_limit = row["max_retries"]
effective_limit = (
int(task_limit) if task_limit is not None
else int(failure_limit)
)
if failures >= effective_limit:
continue
conn.execute(
"UPDATE tasks SET status = 'ready', "
"consecutive_failures = 0, last_failure_error = NULL "
"UPDATE tasks SET status = 'ready' "
"WHERE id = ? AND status = 'blocked'",
(task_id,),
)
@ -5424,7 +5771,7 @@ def dispatch_once(
if _crash_auto_blocked:
result.auto_blocked.extend(_crash_auto_blocked)
result.timed_out = enforce_max_runtime(conn)
result.promoted = recompute_ready(conn)
result.promoted = recompute_ready(conn, failure_limit=failure_limit)
# Count tasks already running so max_spawn enforces concurrency rather
# than a per-tick spawn budget. See the docstring above for the full
@ -6300,6 +6647,25 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
lines.append(_cap(task.body, _CTX_MAX_BODY_BYTES))
lines.append("")
# Attachments — files uploaded to this task (PDFs, source docs,
# images). Surface the absolute on-disk path so the worker, which has
# full file-tool access, can read them directly (read_file, terminal
# `pdftotext`, etc.). On the local terminal backend the path resolves
# as-is; remote backends need the kanban attachments dir mounted.
attachments = list_attachments(conn, task_id)
if attachments:
lines.append("## Attachments")
lines.append(
"Files attached to this task. Read them with the file/terminal "
"tools at the absolute paths below:"
)
for att in attachments:
size_kb = max(1, (att.size + 1023) // 1024) if att.size else 0
size_str = f", {size_kb} KB" if size_kb else ""
ctype = f", {att.content_type}" if att.content_type else ""
lines.append(f"- `{att.filename}`{ctype}{size_str} → `{att.stored_path}`")
lines.append("")
# Prior attempts — show closed runs so a retrying worker sees the
# history. Skip the currently-active run (that's this worker).
# Cap at _CTX_MAX_PRIOR_ATTEMPTS most-recent closed runs; older

View file

@ -65,6 +65,46 @@ import os
import sys
def _set_process_title() -> None:
"""Set the process title to 'hermes' so tools like 'ps', 'top', and
'htop' show the app name instead of 'python3.xx'.
Purely cosmetic non-fatal on any platform.
Strategy (try in order):
1. ``setproctitle`` (opt-in dep installed via ``hermes tools`` or
``pip install setproctitle``, or bundled in a future release).
2. ctypes ``prctl(PR_SET_NAME)`` (Linux only, 15-char limit).
3. ctypes ``pthread_setname_np`` (macOS only, kernel thread name
changes lldb/top but not ``ps aux``).
4. No-op on Windows (the .exe name is already ``hermes.exe``).
"""
# Strategy 1: setproctitle (best — works on macOS, Linux, BSD)
try:
import setproctitle # type: ignore[import-untyped]
setproctitle.setproctitle("hermes")
return
except ImportError:
pass
# Strategy 2/3: platform-specific ctypes fallback
import ctypes
import platform
try:
system = platform.system()
if system == "Linux":
libc = ctypes.CDLL("libc.so.6", use_errno=True)
libc.prctl(15, b"hermes", 0, 0, 0) # PR_SET_NAME = 15
elif system == "Darwin":
libc = ctypes.CDLL("libc.dylib", use_errno=True)
libc.pthread_setname_np(b"hermes")
# Windows: the .exe name is already ``hermes.exe`` — nothing to do.
except Exception:
pass
# Mouse-tracking residue suppression — runs BEFORE every other import on the
# TUI hot path so the terminal stops emitting SGR/X10 mouse reports while the
# Python launcher is still doing imports (≈100300ms in cooked + echo mode,
@ -2385,7 +2425,12 @@ def select_provider_and_model(args=None):
if active == "openrouter" and get_env_value("OPENAI_BASE_URL"):
active = "custom"
from hermes_cli.models import CANONICAL_PROVIDERS, _PROVIDER_LABELS
from hermes_cli.models import (
CANONICAL_PROVIDERS,
_PROVIDER_LABELS,
group_providers,
provider_group_for_slug,
)
provider_labels = dict(_PROVIDER_LABELS) # derive from canonical list
if active and active in _custom_provider_map:
@ -2398,8 +2443,43 @@ def select_provider_and_model(args=None):
print(f" Active provider: {active_label}")
print()
# Step 1: Provider selection — flat list from CANONICAL_PROVIDERS
all_providers = [(p.slug, p.tui_desc) for p in CANONICAL_PROVIDERS]
# Step 1: Provider selection.
#
# Canonical providers are folded into top-level groups (display only — see
# PROVIDER_GROUPS in hermes_cli/models.py). A multi-member group shows one
# row ("Kimi / Moonshot ▸"); picking it opens a member sub-picker that
# resolves back to a concrete slug, so the dispatch chain below is
# unchanged. Custom providers and the trailing actions stay flat.
canonical_descs = {p.slug: p.tui_desc for p in CANONICAL_PROVIDERS}
grouped_rows = group_providers([p.slug for p in CANONICAL_PROVIDERS])
# The group/slug that should be pre-selected: the active provider's group
# if it's grouped, otherwise the active slug itself.
active_group = provider_group_for_slug(active) if active else ""
# ordered entries: (key, label, members)
# members == [] → leaf row, key is a provider slug / action
# members != [] → group row, key is "group:<gid>"
ordered: list[tuple[str, str, list[str]]] = []
default_idx = 0
for row in grouped_rows:
if row["kind"] == "group":
gid = row["group_id"]
label = f"{row['label']}"
key = f"group:{gid}"
is_active = bool(active_group) and gid == active_group
members = row["members"]
else:
slug = row["slug"]
label = canonical_descs.get(slug, provider_labels.get(slug, slug))
key = slug
is_active = bool(active) and slug == active
members = []
if is_active:
ordered.append((key, f"{label} ← currently active", members))
default_idx = len(ordered) - 1
else:
ordered.append((key, label, members))
for key, provider_info in _custom_provider_map.items():
name = provider_info["name"]
@ -2407,36 +2487,49 @@ def select_provider_and_model(args=None):
short_url = base_url.replace("https://", "").replace("http://", "").rstrip("/")
saved_model = provider_info.get("model", "")
model_hint = f"{saved_model}" if saved_model else ""
all_providers.append((key, f"{name} ({short_url}){model_hint}"))
# Build the menu
ordered = []
default_idx = 0
for key, label in all_providers:
label = f"{name} ({short_url}){model_hint}"
if active and key == active:
ordered.append((key, f"{label} ← currently active"))
ordered.append((key, f"{label} ← currently active", []))
default_idx = len(ordered) - 1
else:
ordered.append((key, label))
ordered.append((key, label, []))
ordered.append(("custom", "Custom endpoint (enter URL manually)"))
ordered.append(("custom", "Custom endpoint (enter URL manually)", []))
_has_saved_custom_list = isinstance(config.get("custom_providers"), list) and bool(
config.get("custom_providers")
)
if _has_saved_custom_list:
ordered.append(("remove-custom", "Remove a saved custom provider"))
ordered.append(("aux-config", "Configure auxiliary models..."))
ordered.append(("cancel", "Leave unchanged"))
ordered.append(("remove-custom", "Remove a saved custom provider", []))
ordered.append(("aux-config", "Configure auxiliary models...", []))
ordered.append(("cancel", "Leave unchanged", []))
provider_idx = _prompt_provider_choice(
[label for _, label in ordered],
[label for _, label, _ in ordered],
default=default_idx,
)
if provider_idx is None or ordered[provider_idx][0] == "cancel":
print("No change.")
return
selected_provider = ordered[provider_idx][0]
selected_key = ordered[provider_idx][0]
selected_members = ordered[provider_idx][2]
# Group row → drill into a member sub-picker. Default to the active member
# if the active provider lives in this group.
if selected_members:
member_default = 0
if active in selected_members:
member_default = selected_members.index(active)
member_labels = [
canonical_descs.get(m, provider_labels.get(m, m)) for m in selected_members
]
member_idx = _prompt_provider_choice(member_labels, default=member_default)
if member_idx is None:
print("No change.")
return
selected_provider = selected_members[member_idx]
else:
selected_provider = selected_key
if selected_provider == "aux-config":
_aux_config_menu()
@ -8008,39 +8101,6 @@ def _detect_concurrent_hermes_instances(
except Exception:
return []
# Build a set of PIDs to exclude: the Python process itself plus its
# entire parent chain. On Windows the setuptools-generated hermes.exe
# launcher is a separate native process that spawns python.exe (the
# interpreter that runs our code). os.getpid() returns the Python PID,
# but the launcher (which holds the file lock) is the parent. Without
# walking the parent chain, every ``hermes update`` reports its own
# launcher as a concurrent instance — a false positive.
if exclude_pid is not None:
exclude_pids: set[int] = {exclude_pid}
else:
exclude_pids = {os.getpid()}
# The parent-walk is best-effort: if psutil rejects a PID (NoSuchProcess /
# AccessDenied) we stop walking and use whatever we've collected so far.
# Broader Exception catch on the outer block guards against partially-
# stubbed psutil in unit tests (e.g. a SimpleNamespace lacking Process /
# NoSuchProcess) — the surrounding update flow documents this helper as
# "never raises".
try:
current = psutil.Process(next(iter(exclude_pids)))
while True:
try:
parent = current.parent()
except Exception:
break
if parent is None or parent.pid <= 0:
break
if parent.pid in exclude_pids:
break # loop detected
exclude_pids.add(parent.pid)
current = parent
except Exception:
pass
# Resolve every shim path to its canonical form once for cheap comparison.
shim_paths: set[str] = set()
for shim in _hermes_exe_shims(scripts_dir):
@ -8051,6 +8111,56 @@ def _detect_concurrent_hermes_instances(
if not shim_paths:
return []
# Build a set of PIDs to exclude: the Python process itself plus every
# ancestor whose executable is one of our shims. On Windows the
# setuptools-generated hermes.exe launcher is a separate native process
# that spawns python.exe (the interpreter that runs our code).
# os.getpid() returns the Python PID, but the launcher (which holds the
# file lock) is the parent. Without excluding it, every ``hermes update``
# reports its own launcher as a concurrent instance — a false positive
# (issues #29341, #34795).
#
# Two robustness points learned from the field:
# 1. Use ``proc.parents()`` — it returns the WHOLE ancestor list in one
# call. The earlier per-hop ``current.parent()`` loop bailed on the
# first psutil error (AccessDenied/NoSuchProcess is common on Windows
# across session/elevation boundaries), leaving the launcher shim in
# the candidate set and re-triggering the false positive.
# 2. Only exclude ancestors whose exe is itself a shim. A genuine second
# hermes.exe sitting *under* a non-Hermes parent (e.g. a Hermes
# Desktop backend child) must still be flagged, so we don't blanket-
# exclude unrelated ancestors like the shell or terminal.
# Broad ``except Exception`` guards against partially-stubbed psutil in
# unit tests; this helper is documented as "never raises".
if exclude_pid is not None:
exclude_pids: set[int] = {int(exclude_pid)}
else:
exclude_pids = {os.getpid()}
try:
seed = next(iter(exclude_pids))
try:
ancestors = psutil.Process(seed).parents()
except Exception:
ancestors = []
for ancestor in ancestors:
try:
anc_exe = ancestor.exe()
except Exception:
continue
if not anc_exe:
continue
try:
anc_norm = str(Path(anc_exe).resolve()).lower()
except (OSError, ValueError):
anc_norm = str(anc_exe).lower()
if anc_norm in shim_paths:
try:
exclude_pids.add(int(ancestor.pid))
except Exception:
continue
except Exception:
pass
matches: list[tuple[int, str]] = []
try:
proc_iter = psutil.process_iter(["pid", "exe", "name"])
@ -8091,6 +8201,13 @@ def _format_concurrent_instances_message(
lines.append("")
lines.append(" Close Hermes Desktop, exit any open `hermes` REPLs, and")
lines.append(" stop the gateway (`hermes gateway stop`) before retrying.")
lines.append("")
if matches:
pid_args = " ".join(f"/PID {pid}" for pid, _ in matches)
lines.append(" If you've already closed everything and these PIDs are")
lines.append(" stale, terminate them directly, then retry the update:")
lines.append(f" taskkill {pid_args} /F")
lines.append("")
lines.append(" Override with `hermes update --force` if you've already")
lines.append(" confirmed those processes will not write to the venv.")
return "\n".join(lines)
@ -9055,18 +9172,51 @@ def cmd_update(args):
def _cmd_update_pip(args):
"""Update Hermes via pip (for PyPI installs)."""
from hermes_cli import __version__
from hermes_cli.config import is_uv_tool_install
print(f"→ Current version: {__version__}")
print("→ Checking PyPI for updates...")
uv = shutil.which("uv")
if uv:
in_venv = sys.prefix != sys.base_prefix
# pipx-managed installs live under .../pipx/venvs/<name>/...
pipx_managed = "pipx" in sys.prefix.split(os.sep)
pipx = shutil.which("pipx") if pipx_managed else None
# Only the ``uv pip install`` path inside a venv needs VIRTUAL_ENV
# exported (uv refuses to install without it when the launcher shim
# didn't activate the venv). ``uv tool upgrade`` / ``pipx upgrade``
# operate on a named environment and ignore VIRTUAL_ENV, so we don't
# set it for them.
export_virtualenv = False
if is_uv_tool_install():
if not uv:
print("✗ Detected a uv-tool install but `uv` is not on PATH; install uv and retry.")
sys.exit(1)
cmd = [uv, "tool", "upgrade", "hermes-agent"]
elif pipx_managed and pipx:
# pipx owns its own venv; ``pipx upgrade`` is the only correct path.
# Matches scripts/auto-update.sh, which already uses pipx upgrade.
cmd = [pipx, "upgrade", "hermes-agent"]
elif uv:
cmd = [uv, "pip", "install", "--upgrade", "hermes-agent"]
if in_venv:
# Launcher shim runs the venv interpreter but doesn't export
# VIRTUAL_ENV; without it uv errors "No virtual environment found".
export_virtualenv = True
else:
# Outside any venv, ``--system`` lets uv target the active
# interpreter, matching pip's default behaviour.
cmd.insert(3, "--system")
else:
cmd = [sys.executable, "-m", "pip", "install", "--upgrade", "hermes-agent"]
print(f"→ Running: {' '.join(cmd)}")
result = subprocess.run(cmd)
run_kwargs = {}
if export_virtualenv:
run_kwargs["env"] = {**os.environ, "VIRTUAL_ENV": sys.prefix}
result = subprocess.run(cmd, **run_kwargs)
if result.returncode != 0:
print("✗ Update failed")
sys.exit(1)
@ -11157,6 +11307,13 @@ def cmd_completion(args, parser=None):
print(generate_bash(parser))
def cmd_prompt_size(args):
"""Show a byte/char breakdown of the system prompt + tool schemas."""
from hermes_cli.prompt_size import cmd_prompt_size as _impl
_impl(args)
def cmd_logs(args):
"""View and filter Hermes log files."""
from hermes_cli.logs import tail_log, list_logs
@ -11193,6 +11350,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
"dump", "fallback", "gateway", "hooks", "import", "insights",
"gui", "desktop", "kanban", "login", "logout", "logs", "lsp", "mcp", "memory", "migrate",
"model", "pairing", "plugins", "portal", "postinstall", "profile", "proxy",
"prompt-size",
"send", "sessions", "setup",
"skills", "slack", "status", "tools", "uninstall", "update",
"version", "webhook", "whatsapp", "chat", "secrets", "security",
@ -11293,6 +11451,26 @@ _AGENT_SUBCOMMANDS = {
}
def _is_tui_chat_launch(args) -> bool:
return bool(getattr(args, "tui", False) or os.environ.get("HERMES_TUI") == "1")
def _command_has_dedicated_mcp_startup(args) -> bool:
if args.command == "acp":
return True
if args.command == "gateway" and getattr(args, "gateway_command", None) == "run":
return True
if args.command == "cron" and getattr(args, "cron_command", None) in {"run", "tick"}:
return True
return False
def _should_background_mcp_startup(args) -> bool:
if _is_tui_chat_launch(args):
return False
return args.command in {None, "chat", "rl"}
def _prepare_agent_startup(args) -> None:
"""Discover plugins/MCP/hooks for commands that can run an agent turn."""
_sub_attr, _sub_set = _AGENT_SUBCOMMANDS.get(args.command, (None, None))
@ -11312,19 +11490,42 @@ def _prepare_agent_startup(args) -> None:
"plugin discovery failed at CLI startup",
exc_info=True,
)
try:
# MCP tool discovery — no event loop running in CLI/TUI startup,
# so inline is safe. Moved here from model_tools.py module scope
# to avoid freezing the gateway's event loop on its first message
# via the same lazy import path (#16856).
from tools.mcp_tool import discover_mcp_tools
_run_inline_mcp_discovery = True
if _is_tui_chat_launch(args):
# The TUI launcher hands off to a dedicated startup path that already
# backgrounds MCP discovery with a bounded join before the first tool
# snapshot.
_run_inline_mcp_discovery = False
elif _command_has_dedicated_mcp_startup(args):
# These entrypoints already do their own MCP startup later on the real
# runtime path (gateway executor, ACP launcher, cron job runner).
_run_inline_mcp_discovery = False
elif _should_background_mcp_startup(args):
try:
from hermes_cli.mcp_startup import start_background_mcp_discovery
discover_mcp_tools()
except Exception:
logger.debug(
"MCP tool discovery failed at CLI startup",
exc_info=True,
)
start_background_mcp_discovery(
logger=logger,
thread_name="cli-mcp-discovery",
)
except Exception:
logger.debug(
"Background MCP tool discovery failed at CLI startup",
exc_info=True,
)
_run_inline_mcp_discovery = False
if _run_inline_mcp_discovery:
try:
# MCP tool discovery remains synchronous for entrypoints that do
# not own a later bounded/executor startup path.
from tools.mcp_tool import discover_mcp_tools
discover_mcp_tools()
except Exception:
logger.debug(
"MCP tool discovery failed at CLI startup",
exc_info=True,
)
try:
from hermes_cli.config import load_config
from agent.shell_hooks import register_from_config
@ -11465,6 +11666,10 @@ def _try_termux_fast_tui_launch() -> bool:
def main():
"""Main entry point for hermes CLI."""
# Cosmetic: make the process show up as 'hermes' instead of 'python3.11'
# in ps/top/htop. Non-fatal — just a nicer UX.
_set_process_title()
# Force UTF-8 stdio on Windows before anything prints. No-op elsewhere.
try:
from hermes_cli.stdio import configure_windows_stdio
@ -13218,9 +13423,15 @@ Examples:
),
)
memory_sub = memory_parser.add_subparsers(dest="memory_command")
memory_sub.add_parser(
_setup_parser = memory_sub.add_parser(
"setup", help="Interactive provider selection and configuration"
)
_setup_parser.add_argument(
"provider",
nargs="?",
default=None,
help="Provider to configure directly (e.g. honcho), skipping the picker",
)
memory_sub.add_parser("status", help="Show current memory provider config")
memory_sub.add_parser("off", help="Disable external provider (built-in only)")
_reset_parser = memory_sub.add_parser(
@ -14471,6 +14682,30 @@ Examples:
)
logs_parser.set_defaults(func=cmd_logs)
# =========================================================================
# prompt-size command
# =========================================================================
prompt_size_parser = subparsers.add_parser(
"prompt-size",
help="Show a byte breakdown of the system prompt + tool schemas",
description=(
"Report the fixed prompt budget for a fresh session: system "
"prompt total, skills index, memory, user profile, and tool-schema "
"JSON. Runs offline (no API call)."
),
)
prompt_size_parser.add_argument(
"--platform",
default="cli",
help="Platform to simulate (cli, telegram, discord, ...). Default: cli",
)
prompt_size_parser.add_argument(
"--json",
action="store_true",
help="Emit the breakdown as JSON",
)
prompt_size_parser.set_defaults(func=cmd_prompt_size)
# =========================================================================
# Parse and execute
# =========================================================================

59
hermes_cli/mcp_startup.py Normal file
View file

@ -0,0 +1,59 @@
"""Shared CLI/TUI-safe helpers for background MCP discovery."""
from __future__ import annotations
import threading
from typing import Optional
_mcp_discovery_lock = threading.Lock()
_mcp_discovery_started = False
_mcp_discovery_thread: Optional[threading.Thread] = None
def _has_configured_mcp_servers() -> bool:
"""Cheap config probe so non-MCP users avoid importing the MCP stack."""
try:
from hermes_cli.config import read_raw_config
mcp_servers = (read_raw_config() or {}).get("mcp_servers")
return isinstance(mcp_servers, dict) and len(mcp_servers) > 0
except Exception:
# Be conservative: if config probing fails, try discovery in the
# background so startup still can't block.
return True
def start_background_mcp_discovery(*, logger, thread_name: str) -> None:
"""Spawn one shared background MCP discovery thread for this process."""
global _mcp_discovery_started, _mcp_discovery_thread
with _mcp_discovery_lock:
if _mcp_discovery_started:
return
_mcp_discovery_started = True
if not _has_configured_mcp_servers():
return
def _discover() -> None:
try:
from tools.mcp_tool import discover_mcp_tools
discover_mcp_tools()
except Exception:
logger.debug("Background MCP tool discovery failed", exc_info=True)
thread = threading.Thread(
target=_discover,
name=thread_name,
daemon=True,
)
_mcp_discovery_thread = thread
thread.start()
def wait_for_mcp_discovery(timeout: float = 0.75) -> None:
"""Briefly wait for background MCP discovery before the first tool snapshot."""
thread = _mcp_discovery_thread
if thread is None or not thread.is_alive():
return
thread.join(timeout=timeout)

View file

@ -452,7 +452,11 @@ def memory_command(args) -> None:
"""Route memory subcommands."""
sub = getattr(args, "memory_command", None)
if sub == "setup":
cmd_setup(args)
provider = getattr(args, "provider", None)
if provider:
cmd_setup_provider(provider)
else:
cmd_setup(args)
elif sub == "status":
cmd_status(args)
else:

View file

@ -936,6 +936,105 @@ _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
# ---------------------------------------------------------------------------
# Provider groups — DISPLAY ONLY
#
# Some vendors expose several Hermes provider slugs (one per endpoint /
# auth method: global API, China API, OAuth coding plan, ...). Listing every
# slug as a top-level row in the interactive `hermes model` / setup wizard /
# Telegram `/model` pickers makes that list long and noisy.
#
# These groups fold related slugs under one top-level row in INTERACTIVE
# PICKERS only. They do NOT change ``CANONICAL_PROVIDERS``, slug identity,
# the ``--provider`` flag, ``/model <provider:model>``, or any typed path —
# every member slug remains individually addressable. Grouping is a pure
# display affordance; ``group_providers()`` is the single fold used by all
# three picker surfaces so they stay consistent.
#
# group_id -> (display_label, [member_slug, ...])
#
# Member order is the order shown inside the group submenu.
# ---------------------------------------------------------------------------
PROVIDER_GROUPS: dict[str, tuple[str, list[str]]] = {
"kimi": ("Kimi / Moonshot", ["kimi-coding", "kimi-coding-cn"]),
"minimax": ("MiniMax", ["minimax", "minimax-oauth", "minimax-cn"]),
"xai": ("xAI Grok", ["xai", "xai-oauth"]),
"google": ("Google Gemini", ["gemini", "google-gemini-cli"]),
"openai": ("OpenAI", ["openai-codex", "openai-api"]),
"opencode": ("OpenCode", ["opencode-zen", "opencode-go"]),
"copilot": ("GitHub Copilot", ["copilot", "copilot-acp"]),
}
# Reverse index: member slug -> group_id. Built once at import.
_SLUG_TO_GROUP: dict[str, str] = {
slug: gid for gid, (_label, members) in PROVIDER_GROUPS.items() for slug in members
}
def provider_group_for_slug(slug: str) -> str:
"""Return the group_id a provider slug belongs to, or "" if ungrouped."""
return _SLUG_TO_GROUP.get(str(slug or "").strip().lower(), "")
def group_providers(slugs):
"""Fold a flat ordered slug iterable into picker rows by provider group.
DISPLAY ONLY. Used by every interactive picker (``hermes model``, the
setup wizard, the Telegram ``/model`` keyboard) so grouping is identical
across surfaces.
Each returned row is a dict::
{"kind": "single", "slug": <slug>} # ungrouped, or
# 1-member group
{"kind": "group", "group_id": <gid>, "label": <label>,
"members": [<slug>, ...]} # 2+ members
Rules:
* A group row appears at the position of its FIRST present member, in
the input order. Subsequent members fold into that row (and are not
emitted again).
* Member order inside a group follows ``PROVIDER_GROUPS`` declaration,
restricted to the members actually present in ``slugs``.
* A group reduced to a single present member degrades to a ``single``
row no pointless one-item submenu.
* Slugs not in any group pass through as ``single`` rows, order
preserved.
* Duplicate slugs in the input are ignored after first sight.
"""
seen: set[str] = set()
# Which present members each group has, in declaration order.
group_members: dict[str, list[str]] = {}
for gid, (_label, members) in PROVIDER_GROUPS.items():
present = [m for m in members if m in set(slugs)]
if present:
group_members[gid] = present
rows = []
emitted_groups: set[str] = set()
for slug in slugs:
s = str(slug or "").strip().lower()
if not s or s in seen:
continue
seen.add(s)
gid = _SLUG_TO_GROUP.get(s, "")
if not gid:
rows.append({"kind": "single", "slug": s})
continue
if gid in emitted_groups:
continue # already folded at the first member's position
emitted_groups.add(gid)
members = group_members.get(gid, [s])
if len(members) <= 1:
rows.append({"kind": "single", "slug": members[0]})
else:
label, _ = PROVIDER_GROUPS[gid]
rows.append(
{"kind": "group", "group_id": gid, "label": label, "members": list(members)}
)
return rows
_PROVIDER_ALIASES = {
"glm": "zai",
"z-ai": "zai",

View file

@ -4,6 +4,7 @@ from __future__ import annotations
import hashlib
import json
import threading
import time
import urllib.request
from dataclasses import dataclass
@ -15,6 +16,7 @@ NousAccountInfoSource = Literal["jwt", "account_api", "inference_key", "none", "
_ACCOUNT_INFO_CACHE_TTL = 60
_account_info_cache: tuple[str, float, "NousPortalAccountInfo"] | None = None
_ACCOUNT_INFO_CACHE_LOCK = threading.Lock()
@dataclass(frozen=True)
@ -302,10 +304,11 @@ def _fresh_account_info(
portal_base_url = _portal_base_url(refreshed_state) or portal_base_url
cache_key = _cache_key(access_token, portal_base_url)
if not force_fresh and _account_info_cache is not None:
cached_key, cached_at, cached_info = _account_info_cache
if cached_key == cache_key and (time.monotonic() - cached_at) < _ACCOUNT_INFO_CACHE_TTL:
return cached_info
with _ACCOUNT_INFO_CACHE_LOCK:
if not force_fresh and _account_info_cache is not None:
cached_key, cached_at, cached_info = _account_info_cache
if cached_key == cache_key and (time.monotonic() - cached_at) < _ACCOUNT_INFO_CACHE_TTL:
return cached_info
payload = _fetch_nous_account_info(access_token, portal_base_url)
if not payload:
@ -327,7 +330,8 @@ def _fresh_account_info(
state=refreshed_state,
portal_base_url=portal_base_url,
)
_account_info_cache = (cache_key, time.monotonic(), info)
with _ACCOUNT_INFO_CACHE_LOCK:
_account_info_cache = (cache_key, time.monotonic(), info)
return info
except Exception as exc:
return _error_info(

View file

@ -587,9 +587,20 @@ def apply_nous_managed_defaults(
changed.add("browser")
if "image_gen" in selected_toolsets and not fal_key_is_configured():
image_cfg = config.get("image_gen")
if not isinstance(image_cfg, dict):
image_cfg = {}
config["image_gen"] = image_cfg
image_cfg["use_gateway"] = True
changed.add("image_gen")
if "video_gen" in selected_toolsets and not fal_key_is_configured():
video_cfg = config.get("video_gen")
if not isinstance(video_cfg, dict):
video_cfg = {}
config["video_gen"] = video_cfg
video_cfg["provider"] = "fal"
video_cfg["use_gateway"] = True
changed.add("video_gen")
return changed

View file

@ -174,28 +174,55 @@ def run_oneshot(
# Redirect stderr AND stdout to devnull for the entire call tree.
# We'll print the final response to the real stdout at the end.
real_stdout = sys.stdout
real_stderr = sys.stderr
devnull = open(os.devnull, "w", encoding="utf-8")
response: Optional[str] = None
failure: BaseException | None = None
try:
with redirect_stdout(devnull), redirect_stderr(devnull):
response = _run_agent(
prompt,
model=model,
provider=provider,
toolsets=explicit_toolsets,
use_config_toolsets=use_config_toolsets,
)
try:
response = _run_agent(
prompt,
model=model,
provider=provider,
toolsets=explicit_toolsets,
use_config_toolsets=use_config_toolsets,
)
except BaseException as exc: # noqa: BLE001
# Capture anything that escapes the agent (including OSError
# from prompt_toolkit/Vt100 when stdout is a non-TTY pipe,
# KeyboardInterrupt, SystemExit, etc.) so we can surface it on
# the real stderr instead of crashing past the redirect with a
# traceback that the caller never sees. A silent exit in a
# cron / SSH / subprocess context is the worst failure mode.
# See #30623.
failure = exc
finally:
try:
devnull.close()
except Exception:
pass
if response:
real_stdout.write(response)
if not response.endswith("\n"):
real_stdout.write("\n")
real_stdout.flush()
if failure is not None:
# Re-raise control-flow exceptions so the parent handles them as usual
# (Ctrl-C / explicit sys.exit() inside the agent).
if isinstance(failure, (KeyboardInterrupt, SystemExit)):
raise failure
real_stderr.write(f"hermes -z: agent failed: {failure}\n")
real_stderr.flush()
return 1
if not (response or "").strip():
real_stderr.write("hermes -z: no final response was produced; treating the run as failed.\n")
real_stderr.flush()
return 1
assert response is not None # narrowed by the empty-response guard above
real_stdout.write(response)
if not response.endswith("\n"):
real_stdout.write("\n")
real_stdout.flush()
return 0

View file

@ -1471,8 +1471,9 @@ def import_profile(archive_path: str, name: Optional[str] = None) -> Path:
def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) -> None:
"""Rename Honcho host blocks for a renamed profile without changing peers."""
old_host = f"hermes.{old_name}"
new_host = f"hermes.{new_name}"
old_host = f"hermes_{old_name}"
legacy_old_host = f"hermes.{old_name}"
new_host = f"hermes_{new_name}"
candidates = [
new_dir / "honcho.json",
@ -1496,18 +1497,24 @@ def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) ->
continue
hosts = raw.get("hosts")
if not isinstance(hosts, dict) or old_host not in hosts:
if not isinstance(hosts, dict):
continue
source_host = old_host if old_host in hosts else legacy_old_host
if source_host not in hosts:
continue
if new_host in hosts:
print(f"⚠ Honcho host block not migrated: {new_host} already exists in {path}")
continue
block = hosts[old_host]
block = hosts[source_host]
if isinstance(block, dict) and "aiPeer" not in block:
bare = old_host.split(".", 1)[1] if "." in old_host else old_host
if source_host.startswith("hermes_"):
bare = source_host.split("_", 1)[1]
else:
bare = source_host.split(".", 1)[1] if "." in source_host else source_host
block["aiPeer"] = bare
hosts[new_host] = hosts.pop(old_host)
hosts[new_host] = hosts.pop(source_host)
tmp = path.with_suffix(path.suffix + ".tmp")
try:
tmp.write_text(json.dumps(raw, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
@ -1519,7 +1526,7 @@ def _migrate_honcho_profile_host(old_name: str, new_name: str, new_dir: Path) ->
pass
continue
print(f"✓ Honcho host updated: {old_host}{new_host}")
print(f"✓ Honcho host updated: {source_host}{new_host}")
def rename_profile(old_name: str, new_name: str) -> Path:

153
hermes_cli/prompt_size.py Normal file
View file

@ -0,0 +1,153 @@
"""Prompt-size diagnostic: ``hermes prompt-size``.
Reports a byte/char breakdown of the system prompt the agent would build for
a fresh session system prompt total, the ``<available_skills>`` index,
memory + user profile, and tool-schema JSON. Lets users see where their fixed
prompt budget goes (issue #34667) without parsing a saved session JSON by hand.
The diagnostic builds a real inspection agent (so the numbers match what
actually ships on the wire) but never makes a network call: it passes dummy
credentials so ``AIAgent.__init__`` takes the direct-construction path, then
calls ``build_system_prompt_parts`` / inspects ``agent.tools`` offline.
"""
from __future__ import annotations
import json
import re
from typing import Any, Dict, List, Tuple
# The skills index is wrapped in this tag pair inside the stable tier.
_SKILLS_BLOCK_RE = re.compile(r"<available_skills>.*?</available_skills>", re.DOTALL)
def _bytes(s: str) -> int:
return len(s.encode("utf-8"))
def _build_inspection_agent(platform: str) -> Any:
"""Construct an offline AIAgent for prompt inspection.
Dummy ``api_key`` + ``base_url`` force the direct-construction path in
``run_agent.py`` (no provider auto-detection, no network). Toolsets and
platform come from the caller so the breakdown matches a real session.
"""
from run_agent import AIAgent
from hermes_cli.config import load_config
cfg = load_config()
model_cfg = cfg.get("model", {}) if isinstance(cfg.get("model"), dict) else {}
model = model_cfg.get("default") or model_cfg.get("model") or ""
return AIAgent(
model=model,
api_key="inspect-only",
base_url="https://openrouter.ai/api/v1",
quiet_mode=True,
save_trajectories=False,
platform=platform,
)
def compute_prompt_breakdown(platform: str = "cli") -> Dict[str, Any]:
"""Return a dict of prompt-size measurements for a fresh session.
Keys: ``system_prompt`` (chars/bytes), ``skills_index``, ``memory``,
``user_profile``, ``tools`` (count + json bytes), and ``sections`` (a list
of (label, chars, bytes) for the three prompt tiers).
"""
from agent.system_prompt import build_system_prompt, build_system_prompt_parts
agent = _build_inspection_agent(platform)
parts = build_system_prompt_parts(agent)
full = build_system_prompt(agent)
stable = parts.get("stable", "")
context = parts.get("context", "")
volatile = parts.get("volatile", "")
# Skills index — the <available_skills> block (the largest single block
# when many skills are installed). Measured inside the stable tier.
skills_match = _SKILLS_BLOCK_RE.search(stable)
skills_index = skills_match.group(0) if skills_match else ""
# Memory + user profile live in the volatile tier. We re-derive their
# blocks directly from the memory store so the numbers are attributable
# even though they're joined into ``volatile``.
memory_block = ""
user_block = ""
store = getattr(agent, "_memory_store", None)
if store is not None:
try:
if getattr(agent, "_memory_enabled", True):
memory_block = store.format_for_system_prompt("memory") or ""
if getattr(agent, "_user_profile_enabled", True):
user_block = store.format_for_system_prompt("user") or ""
except Exception:
pass
# Tool-schema JSON — the other half of the fixed per-call payload.
tools = getattr(agent, "tools", None) or []
tools_json = json.dumps(tools, ensure_ascii=False)
sections: List[Tuple[str, int, int]] = [
("stable (identity/guidance/skills)", len(stable), _bytes(stable)),
("context (AGENTS.md/cwd files)", len(context), _bytes(context)),
("volatile (memory/profile/timestamp)", len(volatile), _bytes(volatile)),
]
return {
"platform": platform,
"model": getattr(agent, "model", "") or "",
"system_prompt": {"chars": len(full), "bytes": _bytes(full)},
"skills_index": {"chars": len(skills_index), "bytes": _bytes(skills_index)},
"memory": {"chars": len(memory_block), "bytes": _bytes(memory_block)},
"user_profile": {"chars": len(user_block), "bytes": _bytes(user_block)},
"tools": {"count": len(tools), "json_bytes": _bytes(tools_json)},
"sections": sections,
}
def _fmt_kb(n: int) -> str:
return f"{n / 1024:.1f} KB"
def render_breakdown(data: Dict[str, Any]) -> str:
"""Render the breakdown as plain text suitable for a terminal."""
lines: List[str] = []
sp = data["system_prompt"]
lines.append(f"Prompt-size breakdown (platform={data['platform']}, model={data['model'] or 'unset'})")
lines.append("")
lines.append(f" System prompt total : {sp['bytes']:>8,} B ({_fmt_kb(sp['bytes'])}, {sp['chars']:,} chars)")
lines.append("")
lines.append(" Major blocks:")
si = data["skills_index"]
mem = data["memory"]
up = data["user_profile"]
lines.append(f" skills index : {si['bytes']:>8,} B ({_fmt_kb(si['bytes'])})")
lines.append(f" memory : {mem['bytes']:>8,} B ({_fmt_kb(mem['bytes'])})")
lines.append(f" user profile : {up['bytes']:>8,} B ({_fmt_kb(up['bytes'])})")
lines.append("")
lines.append(" Prompt tiers:")
for label, chars, byts in data["sections"]:
lines.append(f" {label:<36}: {byts:>8,} B ({_fmt_kb(byts)})")
lines.append("")
tools = data["tools"]
lines.append(f" Tool schemas : {tools['json_bytes']:>8,} B ({_fmt_kb(tools['json_bytes'])}, {tools['count']} tools)")
return "\n".join(lines)
def cmd_prompt_size(args: Any) -> None:
"""Entry point for ``hermes prompt-size``."""
platform = getattr(args, "platform", "cli") or "cli"
as_json = getattr(args, "json", False)
try:
data = compute_prompt_breakdown(platform)
except Exception as e:
print(f"Could not compute prompt-size breakdown: {e}")
return
if as_json:
print(json.dumps(data, ensure_ascii=False, indent=2))
else:
print(render_breakdown(data))

View file

@ -4168,10 +4168,19 @@ _LOOPBACK_HOSTS = frozenset({"127.0.0.1", "::1", "localhost", "testclient"})
def _ws_client_is_allowed(ws: "WebSocket") -> bool:
"""Check if the WebSocket client IP is acceptable.
Loopback mode: only loopback clients allowed the legacy
Loopback bind: only loopback clients allowed the legacy
``?token=<_SESSION_TOKEN>`` path is the only auth we have, so we
don't want LAN hosts guessing tokens.
Explicit non-loopback bind (``--host 0.0.0.0``, ``--host ::``, or a
specific address such as a Tailscale/LAN IP, always with
``--insecure``): allow any peer. The operator explicitly opted into
non-loopback exposure, so the loopback-only peer restriction does not
apply. DNS-rebinding is still blocked by the Host/Origin guard in
:func:`_ws_host_origin_is_allowed`, which mirrors the HTTP layer and
requires the Host header to match the bound interface the same
defence ``_is_accepted_host`` applies to non-loopback HTTP requests.
Gated mode: any peer is allowed uvicorn's ``proxy_headers=True``
(enabled when the OAuth gate is active so cookies can pick up
``X-Forwarded-Proto``) rewrites ``ws.client.host`` to the
@ -4182,6 +4191,14 @@ def _ws_client_is_allowed(ws: "WebSocket") -> bool:
"""
if getattr(app.state, "auth_required", False):
return True
# Any explicit non-loopback bind (0.0.0.0, ::, or a specific LAN /
# Tailscale address) means the operator opted into non-loopback
# access via --insecure. The loopback-only peer gate only applies to
# an actual loopback bind; otherwise the WS handshake is rejected even
# though same-bind HTTP requests pass _is_accepted_host.
bound_host = (getattr(app.state, "bound_host", "") or "").strip().lower()
if bound_host and bound_host not in _LOOPBACK_HOSTS:
return True
client_host = ws.client.host if ws.client else ""
if not client_host:
return True

View file

@ -381,6 +381,7 @@ class SessionDB:
self._lock = threading.Lock()
self._write_count = 0
self._fts_enabled = False
try:
self._conn = sqlite3.connect(
str(self.db_path),
@ -389,7 +390,6 @@ class SessionDB:
# handles contention instead of sitting in SQLite's internal
# busy handler for up to 30s.
timeout=1.0,
# Autocommit mode: Python's default isolation_level=""
# auto-starts transactions on DML, which conflicts with our
# explicit BEGIN IMMEDIATE. None = we manage transactions
# ourselves.
@ -725,14 +725,44 @@ class SessionDB:
# FTS5 setup (separate because CREATE VIRTUAL TABLE can't be in executescript with IF NOT EXISTS reliably)
try:
cursor.execute("SELECT * FROM messages_fts LIMIT 0")
except sqlite3.OperationalError:
cursor.executescript(FTS_SQL)
self._fts_enabled = True
except sqlite3.OperationalError as exc:
if "no such table" not in str(exc).lower():
raise
try:
cursor.executescript(FTS_SQL)
self._fts_enabled = True
except sqlite3.OperationalError as fts_exc:
err = str(fts_exc).lower()
if "fts5" not in err and "no such module" not in err:
raise
logger.warning(
"SQLite FTS5 unavailable for %s; full-text session search "
"disabled. This usually means Hermes is running on an "
"unsupported install (e.g. a pip-installed or pip-managed "
"Python whose bundled SQLite lacks FTS5) rather than a "
"mainline install. Some features may be missing or behave "
"differently. Install the supported way: "
"https://hermes-agent.nousresearch.com (underlying error: %s)",
self.db_path,
fts_exc,
)
# Trigram FTS5 for CJK/substring search
try:
cursor.execute("SELECT * FROM messages_fts_trigram LIMIT 0")
except sqlite3.OperationalError:
cursor.executescript(FTS_TRIGRAM_SQL)
except sqlite3.OperationalError as exc:
if "no such table" not in str(exc).lower():
raise
try:
cursor.executescript(FTS_TRIGRAM_SQL)
except sqlite3.OperationalError as fts_exc:
err = str(fts_exc).lower()
if "fts5" not in err and "no such module" not in err:
raise
# Same FTS5-unavailable cause already warned about above for
# messages_fts; the trigram table is an additional CJK index,
# so just degrade silently here. CJK search falls back to LIKE.
self._conn.commit()
@ -947,6 +977,20 @@ class SessionDB:
)
self._execute_write(_do)
def update_session_model(self, session_id: str, model: str) -> None:
"""Update the model for a session after a mid-session switch.
Unlike ``update_token_counts`` which uses ``COALESCE(model, ?)``
(only filling in NULL), this unconditionally sets the model column
so that the dashboard reflects the user's latest /model choice.
"""
def _do(conn):
conn.execute(
"UPDATE sessions SET model = ? WHERE id = ?",
(model, session_id),
)
self._execute_write(_do)
def update_token_counts(
self,
session_id: str,
@ -2333,6 +2377,9 @@ class SessionDB:
ignores ``sort``. The trigram CJK path honours ``sort`` like the main
FTS5 path.
"""
if not self._fts_enabled:
return []
if not query or not query.strip():
return []

View file

@ -255,7 +255,7 @@ gateway:
title: "**Titel:** {title}"
created: "**Geskep:** {timestamp}"
last_activity: "**Laaste aktiwiteit:** {timestamp}"
tokens: "**Tokens:** {tokens}"
tokens: "**Kumulatiewe API-tokens (elke oproep weer gestuur):** {tokens}"
agent_running: "**Agent loop:** {state}"
state_yes: "Ja ⚡"
state_no: "Nee"

View file

@ -255,7 +255,7 @@ gateway:
title: "**Titel:** {title}"
created: "**Erstellt:** {timestamp}"
last_activity: "**Letzte Aktivität:** {timestamp}"
tokens: "**Tokens:** {tokens}"
tokens: "**Kumulierte API-Tokens (bei jedem Aufruf erneut gesendet):** {tokens}"
agent_running: "**Agent läuft:** {state}"
state_yes: "Ja ⚡"
state_no: "Nein"

View file

@ -270,7 +270,7 @@ gateway:
title: "**Title:** {title}"
created: "**Created:** {timestamp}"
last_activity: "**Last Activity:** {timestamp}"
tokens: "**Tokens:** {tokens}"
tokens: "**Cumulative API tokens (re-sent each call):** {tokens}"
agent_running: "**Agent Running:** {state}"
state_yes: "Yes ⚡"
state_no: "No"

View file

@ -255,7 +255,7 @@ gateway:
title: "**Título:** {title}"
created: "**Creado:** {timestamp}"
last_activity: "**Última actividad:** {timestamp}"
tokens: "**Tokens:** {tokens}"
tokens: "**Tokens de API acumulados (reenviados en cada llamada):** {tokens}"
agent_running: "**Agente activo:** {state}"
state_yes: "Sí ⚡"
state_no: "No"

View file

@ -255,7 +255,7 @@ gateway:
title: "**Título:** {title}"
created: "**Criada:** {timestamp}"
last_activity: "**Última atividade:** {timestamp}"
tokens: "**Tokens:** {tokens}"
tokens: "**Tokens de API cumulativos (reenviados a cada chamada):** {tokens}"
agent_running: "**Agente em execução:** {state}"
state_yes: "Sim ⚡"
state_no: "Não"

View file

@ -32,14 +32,14 @@ Honcho provides AI-native cross-session user modeling. It learns who the user is
### Cloud (app.honcho.dev)
```bash
hermes honcho setup
hermes memory setup honcho
# select "cloud", paste API key from https://app.honcho.dev
```
### Self-hosted
```bash
hermes honcho setup
hermes memory setup honcho
# select "local", enter base URL (e.g. http://localhost:8000)
```

View file

@ -119,17 +119,20 @@ class BrowserUseBrowserProvider(BrowserProvider):
return "Browser Use"
def is_available(self) -> bool:
return self._get_config_or_none() is not None
return self._get_config_or_none(refresh_token=False) is not None
# ------------------------------------------------------------------
# Config resolution (direct API key OR managed Nous gateway)
# ------------------------------------------------------------------
def _get_config_or_none(self) -> Optional[Dict[str, Any]]:
def _get_config_or_none(self, *, refresh_token: bool = True) -> Optional[Dict[str, Any]]:
# Import here to avoid a hard dependency at module-import time —
# managed_tool_gateway pulls in the Nous auth stack which can be
# heavy and is not needed for direct-API-key users.
from tools.managed_tool_gateway import resolve_managed_tool_gateway
from tools.managed_tool_gateway import (
peek_nous_access_token,
resolve_managed_tool_gateway,
)
from tools.tool_backend_helpers import prefers_gateway
# Direct API key wins unless the user has explicitly opted into the
@ -142,7 +145,11 @@ class BrowserUseBrowserProvider(BrowserProvider):
"managed_mode": False,
}
managed = resolve_managed_tool_gateway("browser-use")
# Keep availability scans off the synchronous OAuth refresh path.
managed = resolve_managed_tool_gateway(
"browser-use",
token_reader=None if refresh_token else peek_nous_access_token,
)
if managed is None:
return None

View file

@ -2741,6 +2741,8 @@
// Ready/Block/Complete buttons feel like no-ops. See #26744.
const [patchErr, setPatchErr] = useState(null);
const [newComment, setNewComment] = useState("");
const [uploadBusy, setUploadBusy] = useState(false);
const [uploadErr, setUploadErr] = useState(null);
const [editing, setEditing] = useState(false);
// Home-channel notification toggles. homeChannels is the list of platforms
// the user has a /sethome on; each entry has a `subscribed` bool telling
@ -2789,6 +2791,49 @@
}).catch(function (e) { setErr(String(e.message || e)); });
};
// File upload uses raw fetch (not SDK.fetchJSON, which JSON-encodes)
// so the browser sets the multipart boundary. Auth rides the session
// cookie + bearer token, matching the rest of the dashboard.
const handleUpload = function (fileList) {
const files = Array.prototype.slice.call(fileList || []);
if (!files.length) return;
setUploadBusy(true);
setUploadErr(null);
const token = window.__HERMES_SESSION_TOKEN__ || "";
const headers = token ? { Authorization: "Bearer " + token } : {};
const url = withBoard(`${API}/tasks/${encodeURIComponent(props.taskId)}/attachments`, boardSlug);
// Upload sequentially so a partial failure leaves a clear state.
let chain = Promise.resolve();
files.forEach(function (f) {
chain = chain.then(function () {
const fd = new FormData();
fd.append("file", f, f.name);
return fetch(url, { method: "POST", headers: headers, credentials: "same-origin", body: fd })
.then(function (resp) {
if (!resp.ok) {
return resp.text().then(function (txt) {
throw new Error(parseApiErrorMessage(new Error(resp.status + ": " + txt)));
});
}
});
});
});
chain.then(function () {
load();
props.onRefresh();
}).catch(function (e) {
setUploadErr(String(e.message || e));
}).finally(function () {
setUploadBusy(false);
});
};
const handleDeleteAttachment = function (attachmentId) {
return SDK.fetchJSON(withBoard(`${API}/attachments/${attachmentId}`, boardSlug), { method: "DELETE" })
.then(function () { load(); props.onRefresh(); })
.catch(function (e) { setUploadErr(String(e.message || e)); });
};
const doPatch = function (patch, opts) {
if (opts && opts.confirm && !window.confirm(opts.confirm)) {
return Promise.resolve();
@ -2946,6 +2991,10 @@
homeBusy: homeBusy,
onToggleHomeSub: toggleHomeSubscription,
onRefresh: props.onRefresh,
onUpload: handleUpload,
onDeleteAttachment: handleDeleteAttachment,
uploadBusy: uploadBusy,
uploadErr: uploadErr,
}) : null,
data ? h("div", { className: "hermes-kanban-drawer-comment-row" },
h(Input, {
@ -2968,11 +3017,118 @@
);
}
function _fmtBytes(n) {
n = Number(n) || 0;
if (n < 1024) return n + " B";
if (n < 1024 * 1024) return (n / 1024).toFixed(1) + " KB";
return (n / (1024 * 1024)).toFixed(1) + " MB";
}
// Attachments section in the task drawer (#35338). Upload button +
// list with download links and a delete (×) per row. The download
// link hits GET /attachments/:id which streams the file; the worker
// context surfaces the same files' absolute paths so a kanban worker
// can read them with the file/terminal tools.
function AttachmentsSection(props) {
const i18n = props.i18n;
const atts = props.attachments || [];
const fileRef = useRef(null);
const [dlErr, setDlErr] = useState(null);
// Download via authenticated fetch → blob → synthetic anchor click.
// A plain <a href> can't carry the session header/bearer the dashboard
// auth middleware requires in loopback mode, so fetch with the token
// and hand the browser a blob URL instead.
function downloadAttachment(a) {
const token = window.__HERMES_SESSION_TOKEN__ || "";
const headers = token ? { Authorization: "Bearer " + token } : {};
const url = withBoard(`${API}/attachments/${a.id}`, props.boardSlug);
setDlErr(null);
fetch(url, { headers: headers, credentials: "same-origin" })
.then(function (resp) {
if (!resp.ok) {
return resp.text().then(function (txt) {
throw new Error(parseApiErrorMessage(new Error(resp.status + ": " + txt)));
});
}
return resp.blob();
})
.then(function (blob) {
const objUrl = URL.createObjectURL(blob);
const link = document.createElement("a");
link.href = objUrl;
link.download = a.filename || "attachment";
document.body.appendChild(link);
link.click();
document.body.removeChild(link);
setTimeout(function () { URL.revokeObjectURL(objUrl); }, 10000);
})
.catch(function (e) { setDlErr(String(e.message || e)); });
}
return h("div", { className: "hermes-kanban-section" },
h("div", { className: "hermes-kanban-section-head" },
`${tx(i18n, "attachments", "Attachments")} (${atts.length})`),
h("input", {
ref: fileRef,
type: "file",
multiple: true,
style: { display: "none" },
onChange: function (e) {
if (props.onUpload) props.onUpload(e.target.files);
// Reset so selecting the same file again re-triggers onChange.
try { e.target.value = ""; } catch (_e) { /* ignore */ }
},
}),
h("div", { className: "flex items-center gap-2 mb-2" },
h(Button, {
size: "sm",
variant: "outline",
disabled: !!props.uploadBusy,
onClick: function () { if (fileRef.current) fileRef.current.click(); },
}, props.uploadBusy
? tx(i18n, "uploading", "Uploading…")
: tx(i18n, "uploadFile", "Upload file")),
),
(props.uploadErr || dlErr)
? h("div", { className: "text-xs text-destructive mb-2" }, props.uploadErr || dlErr)
: null,
atts.length === 0
? h("div", { className: "text-xs text-muted-foreground" },
tx(i18n, "noAttachments", "— no attachments —"))
: atts.map(function (a) {
return h("div", {
key: a.id,
className: "flex items-center justify-between gap-2 py-1 text-sm",
},
h("button", {
type: "button",
className: "hermes-kanban-attachment-link truncate",
title: a.filename,
onClick: function () { downloadAttachment(a); },
}, a.filename),
h("span", { className: "text-xs text-muted-foreground whitespace-nowrap" },
_fmtBytes(a.size)),
h("button", {
type: "button",
className: "hermes-kanban-drawer-close",
title: tx(i18n, "removeAttachment", "Remove attachment"),
onClick: function () {
if (window.confirm(tx(i18n, "confirmRemoveAttachment",
"Remove this attachment?"))) {
if (props.onDelete) props.onDelete(a.id);
}
},
}, "×"),
);
}),
);
}
function TaskDetail(props) {
const { t: i18n } = useI18n();
const t = props.data.task;
const comments = props.data.comments || [];
const events = props.data.events || [];
const attachments = props.data.attachments || [];
const links = props.data.links || { parents: [], children: [] };
return h("div", { className: "hermes-kanban-drawer-body" },
@ -3042,6 +3198,15 @@
h("div", { className: "hermes-kanban-section-head" }, tx(i18n, "result", "Result")),
h(MarkdownBlock, { source: t.result, enabled: props.renderMarkdown }),
) : null,
h(AttachmentsSection, {
attachments: attachments,
boardSlug: props.boardSlug,
onUpload: props.onUpload,
onDelete: props.onDeleteAttachment,
uploadBusy: props.uploadBusy,
uploadErr: props.uploadErr,
i18n: i18n,
}),
h("div", { className: "hermes-kanban-section" },
h("div", { className: "hermes-kanban-section-head" },
`${tx(i18n, "comments", "Comments")} (${comments.length})`),

View file

@ -334,6 +334,11 @@
.hermes-kanban-drawer {
width: min(var(--hermes-kanban-drawer-width, 640px), 92vw);
height: 100vh;
/* Dynamic viewport unit excludes the mobile browser's collapsing chrome
(URL/nav bars) so the drawer's bottom row stays reachable. Falls back to
100vh on browsers without dvh support. */
height: 100dvh;
max-height: 100dvh;
background: var(--color-card);
border-left: 1px solid var(--color-border);
display: flex;
@ -352,10 +357,23 @@
align-items: center;
justify-content: space-between;
padding: 0.6rem 0.8rem;
/* Honor the top safe-area inset (notch) so the task id / close button are
not clipped on mobile. */
padding-top: max(0.6rem, env(safe-area-inset-top));
border-bottom: 1px solid var(--color-border);
font-family: var(--font-mono, ui-monospace, monospace);
}
/* On mobile the dashboard shell renders a fixed top bar (min-h-14, hidden at
the lg breakpoint). The drawer is a body-level z-60 overlay starting at the
viewport top, so its header would sit behind that bar. Push the header down
by the bar height (3.5rem) plus the top safe-area inset. */
@media (max-width: 1023px) {
.hermes-kanban-drawer-head {
padding-top: calc(3.5rem + env(safe-area-inset-top));
}
}
.hermes-kanban-drawer-close {
appearance: none;
background: transparent;
@ -368,10 +386,33 @@
}
.hermes-kanban-drawer-close:hover { color: var(--color-foreground); }
/* Attachment download trigger styled as a link, rendered as a <button>
so the click handler can fetch with the session token (#35338). */
.hermes-kanban-attachment-link {
appearance: none;
background: transparent;
border: 0;
padding: 0;
margin: 0;
text-align: left;
color: var(--color-primary, #6ea8fe);
cursor: pointer;
text-decoration: none;
overflow: hidden;
text-overflow: ellipsis;
white-space: nowrap;
flex: 1;
}
.hermes-kanban-attachment-link:hover { text-decoration: underline; }
.hermes-kanban-drawer-body {
flex: 1;
overflow-y: auto;
padding: 0.9rem;
/* When no comment row is rendered (loading / error states), the scrolling
body is the bottom-most element extend its bottom padding past the
mobile browser chrome so the last content stays readable. */
padding-bottom: max(0.9rem, calc(0.9rem + env(safe-area-inset-bottom)));
display: flex;
flex-direction: column;
gap: 0.85rem;
@ -530,6 +571,9 @@
display: flex;
gap: 0.4rem;
padding: 0.55rem 0.75rem;
/* Keep the comment input clear of the mobile browser nav bar / home
indicator by extending the bottom padding with the safe-area inset. */
padding-bottom: max(0.55rem, calc(0.55rem + env(safe-area-inset-bottom)));
border-top: 1px solid var(--color-border);
background: color-mix(in srgb, var(--color-card) 90%, transparent);
}

View file

@ -43,9 +43,11 @@ import os
import sqlite3
import time
from dataclasses import asdict
from pathlib import Path
from typing import Any, Optional
from fastapi import APIRouter, HTTPException, Query, WebSocket, WebSocketDisconnect, status as http_status
from fastapi import APIRouter, File, Form, HTTPException, Query, UploadFile, WebSocket, WebSocketDisconnect, status as http_status
from fastapi.responses import FileResponse
from pydantic import BaseModel, Field
from hermes_cli import kanban_db
@ -186,6 +188,21 @@ def _comment_dict(c: kanban_db.Comment) -> dict[str, Any]:
}
def _attachment_dict(a: kanban_db.Attachment) -> dict[str, Any]:
"""Serialise an Attachment for the drawer. ``stored_path`` is the
absolute on-disk path workers read; the UI uses ``id`` for download."""
return {
"id": a.id,
"task_id": a.task_id,
"filename": a.filename,
"content_type": a.content_type,
"size": a.size,
"uploaded_by": a.uploaded_by,
"stored_path": a.stored_path,
"created_at": a.created_at,
}
def _run_dict(r: kanban_db.Run) -> dict[str, Any]:
"""Serialise a Run for the drawer's Run history section."""
return {
@ -531,6 +548,7 @@ def get_task(
"task": task_d,
"comments": [_comment_dict(c) for c in kanban_db.list_comments(conn, task_id)],
"events": [_event_dict(e) for e in kanban_db.list_events(conn, task_id)],
"attachments": [_attachment_dict(a) for a in kanban_db.list_attachments(conn, task_id)],
"links": _links_for(conn, task_id),
"runs": [
_run_dict(r)
@ -609,6 +627,165 @@ def create_task(payload: CreateTaskBody, board: Optional[str] = Query(None)):
conn.close()
# ---------------------------------------------------------------------------
# Attachments — upload / list / download / delete (#35338)
# ---------------------------------------------------------------------------
# Cap a single upload so a runaway request can't fill the disk. 25 MB
# comfortably covers PDFs, images, and source docs — the kanban use case.
_MAX_ATTACHMENT_BYTES = 25 * 1024 * 1024
def _safe_attachment_name(raw: str) -> str:
"""Reduce a client-supplied filename to a safe basename.
Strips any directory components (``os.path.basename`` on both
separators) so a malicious ``../../etc/passwd`` or ``C:\\x`` collapses
to its leaf. Rejects empty / dotfile-only names. The result is only
ever joined under the per-task attachments dir, never used verbatim
as a path from the client.
"""
name = (raw or "").replace("\\", "/").split("/")[-1].strip()
# Drop control chars and leading dots so we never write a dotfile or
# a name with embedded NULs/newlines.
name = "".join(ch for ch in name if ch.isprintable() and ch not in '\x00').strip()
name = name.lstrip(".").strip()
if not name:
raise HTTPException(status_code=400, detail="invalid attachment filename")
return name[:200]
@router.get("/tasks/{task_id}/attachments")
def list_task_attachments(task_id: str, board: Optional[str] = Query(None)):
board = _resolve_board(board)
conn = _conn(board=board)
try:
if kanban_db.get_task(conn, task_id) is None:
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
return {
"attachments": [
_attachment_dict(a) for a in kanban_db.list_attachments(conn, task_id)
]
}
finally:
conn.close()
@router.post("/tasks/{task_id}/attachments")
async def upload_task_attachment(
task_id: str,
file: UploadFile = File(...),
board: Optional[str] = Query(None),
uploaded_by: Optional[str] = Form(None),
):
"""Store an uploaded file for a task and record its metadata.
The blob lands under ``attachments_root(board)/<task_id>/`` with a
sanitised, collision-resolved name. The worker reads it via the
absolute path surfaced in ``build_worker_context``.
"""
board = _resolve_board(board)
conn = _conn(board=board)
try:
if kanban_db.get_task(conn, task_id) is None:
raise HTTPException(status_code=404, detail=f"task {task_id} not found")
safe_name = _safe_attachment_name(file.filename or "")
# Stream to disk with a hard size cap so a huge upload can't fill
# the disk. Read in chunks; abort + clean up if the cap is hit.
dest_dir = kanban_db.task_attachments_dir(task_id, board=board)
dest_dir.mkdir(parents=True, exist_ok=True)
# Resolve name collisions: foo.pdf → foo (1).pdf, foo (2).pdf, …
stem, dot, ext = safe_name.partition(".")
candidate = safe_name
n = 1
while (dest_dir / candidate).exists():
candidate = f"{stem} ({n}){dot}{ext}"
n += 1
dest_path = dest_dir / candidate
total = 0
try:
with open(dest_path, "wb") as out:
while True:
chunk = await file.read(1024 * 1024)
if not chunk:
break
total += len(chunk)
if total > _MAX_ATTACHMENT_BYTES:
out.close()
dest_path.unlink(missing_ok=True)
raise HTTPException(
status_code=413,
detail=(
f"attachment exceeds {_MAX_ATTACHMENT_BYTES // (1024 * 1024)} MB limit"
),
)
out.write(chunk)
except HTTPException:
raise
except OSError as exc:
raise HTTPException(status_code=500, detail=f"failed to store attachment: {exc}")
att_id = kanban_db.add_attachment(
conn,
task_id,
filename=candidate,
stored_path=str(dest_path.resolve()),
content_type=file.content_type,
size=total,
uploaded_by=(uploaded_by or "dashboard"),
)
att = kanban_db.get_attachment(conn, att_id)
return {"attachment": _attachment_dict(att) if att else None}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
finally:
conn.close()
@router.get("/attachments/{attachment_id}")
def download_attachment(attachment_id: int, board: Optional[str] = Query(None)):
board = _resolve_board(board)
conn = _conn(board=board)
try:
att = kanban_db.get_attachment(conn, attachment_id)
if att is None:
raise HTTPException(status_code=404, detail="attachment not found")
# Confirm the blob still lives under the board's attachments root
# before serving — defense in depth against a tampered DB row.
root = kanban_db.attachments_root(board=board).resolve()
try:
stored = Path(att.stored_path).resolve()
stored.relative_to(root)
except (ValueError, OSError):
raise HTTPException(status_code=404, detail="attachment file unavailable")
if not stored.is_file():
raise HTTPException(status_code=404, detail="attachment file missing on disk")
return FileResponse(
path=str(stored),
filename=att.filename,
media_type=att.content_type or "application/octet-stream",
)
finally:
conn.close()
@router.delete("/attachments/{attachment_id}")
def remove_attachment(attachment_id: int, board: Optional[str] = Query(None)):
board = _resolve_board(board)
conn = _conn(board=board)
try:
att = kanban_db.delete_attachment(conn, attachment_id)
if att is None:
raise HTTPException(status_code=404, detail="attachment not found")
return {"ok": True, "id": attachment_id}
finally:
conn.close()
# ---------------------------------------------------------------------------
# PATCH /tasks/:id (status / assignee / priority / title / body)
# ---------------------------------------------------------------------------

View file

@ -633,7 +633,8 @@ class HindsightMemoryProvider(MemoryProvider):
except Exception:
pass
existing.update(values)
config_path.write_text(json.dumps(existing, indent=2))
from utils import atomic_json_write
atomic_json_write(config_path, existing, mode=0o600)
def post_setup(self, hermes_home: str, config: dict) -> None:
"""Custom setup wizard — installs only the deps needed for the selected mode."""

View file

@ -12,8 +12,8 @@ AI-native cross-session user modeling with multi-pass dialectic reasoning, sessi
## Setup
```bash
hermes honcho setup # full interactive wizard (cloud or local)
hermes memory setup # generic picker, also works
hermes memory setup honcho # configure Honcho directly (works on a fresh install)
hermes memory setup # generic picker, choose Honcho from the list
```
Or manually:
@ -22,6 +22,10 @@ hermes config set memory.provider honcho
echo "HONCHO_API_KEY=***" >> ~/.hermes/.env
```
> `hermes honcho setup` also works, but only **after** Honcho is the active
> memory provider — the `honcho` subcommand is registered for the active
> provider only. On a fresh install, use `hermes memory setup honcho`.
## Architecture Overview
### Two-Layer Context Injection
@ -109,7 +113,7 @@ Config is read from the first file that exists:
| 2 | `~/.hermes/honcho.json` | Default profile (shared host blocks) |
| 3 | `~/.honcho/config.json` | Global (cross-app interop) |
Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.<profile>`.
Host key is derived from the active Hermes profile: `hermes` (default) or `hermes_<profile>`.
For every key, resolution order is: **host block > root > env var > default**.
@ -154,7 +158,7 @@ In gateway deployments (Telegram, Discord, Slack, etc.) each user arrives with a
**Host vs root semantics.** All three keys are accepted at both root and `hosts.<host>` levels. Host-level wins. For maps and prefixes, host-level *replaces* the root value as a whole (not merge), so a host can intentionally own its identity universe or wipe it with `userPeerAliases: {}` / `runtimePeerPrefix: ""`.
**Deployment shapes** (`hermes honcho setup` asks one prompt to set these):
**Deployment shapes** (`hermes memory setup honcho` asks one prompt to set these):
- **Single-operator**`pinUserPeer: true`. All gateway users → `peerName`. Recommended for personal use where you connect Hermes to your own Telegram/Discord/etc.
- **Multi-user gateway**`pinUserPeer: false`, optional `runtimePeerPrefix`. Each runtime user → own peer. Recommended for bots serving many humans.
@ -225,7 +229,7 @@ Multiple Hermes profiles can share one workspace while maintaining separate AI i
"recallMode": "hybrid",
"sessionStrategy": "per-directory"
},
"hermes.coder": {
"hermes_coder": {
"aiPeer": "coder",
"recallMode": "tools",
"sessionStrategy": "per-repo"
@ -236,7 +240,7 @@ Multiple Hermes profiles can share one workspace while maintaining separate AI i
Both profiles see the same user (`yourname`) in the same shared environment (`hermes`), but each AI peer builds its own observations, conclusions, and behavior patterns. The coder's memory stays code-oriented; the main agent's stays broad.
Host key is derived from the active Hermes profile: `hermes` (default) or `hermes.<profile>` (e.g. `hermes -p coder` → host key `hermes.coder`).
Host key is derived from the active Hermes profile: `hermes` (default) or `hermes_<profile>` (e.g. `hermes -p coder` -> host key `hermes_coder`). Older `hermes.<profile>` host blocks are still read for compatibility and are migrated when the CLI writes profile-scoped Honcho config.
### Dialectic & Reasoning
@ -307,7 +311,8 @@ Presets:
| Command | Description |
|---------|-------------|
| `hermes honcho setup` | Full interactive setup wizard |
| `hermes memory setup honcho` | Configure Honcho directly — works on a fresh install |
| `hermes honcho setup` | Interactive setup wizard (only registered once Honcho is the active provider; redirects to `hermes memory setup`) |
| `hermes honcho status` | Show resolved config for active profile |
| `hermes honcho enable` / `disable` | Toggle Honcho for active profile |
| `hermes honcho mode <mode>` | Change recall or observation mode |
@ -344,7 +349,7 @@ Presets:
"dialecticMaxChars": 600,
"saveMessages": true
},
"hermes.coder": {
"hermes_coder": {
"enabled": true,
"aiPeer": "coder",
"sessionStrategy": "per-repo",

View file

@ -249,6 +249,7 @@ class HonchoMemoryProvider(MemoryProvider):
def save_config(self, values, hermes_home):
"""Write config to $HERMES_HOME/honcho.json (Honcho SDK native format)."""
import json
import os
from pathlib import Path
config_path = Path(hermes_home) / "honcho.json"
existing = {}
@ -258,7 +259,8 @@ class HonchoMemoryProvider(MemoryProvider):
except Exception:
pass
existing.update(values)
config_path.write_text(json.dumps(existing, indent=2))
from utils import atomic_json_write
atomic_json_write(config_path, existing, mode=0o600)
def get_config_schema(self):
return [

View file

@ -11,7 +11,7 @@ import sys
from pathlib import Path
from hermes_constants import get_hermes_home
from plugins.memory.honcho.client import resolve_active_host, resolve_config_path, HOST
from plugins.memory.honcho.client import _host_block, profile_host_key, resolve_active_host, resolve_config_path, HOST
from hermes_cli.config import cfg_get
@ -36,7 +36,7 @@ def clone_honcho_for_profile(profile_name: str) -> bool:
if not default_block and not has_key:
return False
new_host = f"{HOST}.{profile_name}"
new_host = profile_host_key(profile_name)
if new_host in hosts:
return False # already exists
@ -192,7 +192,7 @@ def cmd_sync(args) -> None:
if p.name == "default":
continue
if clone_honcho_for_profile(p.name):
print(f" + {p.name} -> hermes.{p.name}")
print(f" + {p.name} -> {profile_host_key(p.name)}")
created += 1
else:
skipped += 1
@ -243,7 +243,7 @@ def _host_key() -> str:
if _profile_override:
if _profile_override in {"default", "custom"}:
return HOST
return f"{HOST}.{_profile_override}"
return profile_host_key(_profile_override)
return resolve_active_host()
@ -275,10 +275,8 @@ def _read_config() -> dict:
def _write_config(cfg: dict, path: Path | None = None) -> None:
path = path or _local_config_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(cfg, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8",
)
from utils import atomic_json_write
atomic_json_write(path, cfg, mode=0o600)
def _resolve_api_key(cfg: dict) -> str:
@ -292,7 +290,7 @@ def _resolve_api_key(cfg: dict) -> str:
config shapes, e.g. ``localhost:8000``) still pass the Honcho SDK
will reject them itself with a clearer error than ours.
"""
host_key = ((cfg.get("hosts") or {}).get(_host_key()) or {}).get("apiKey")
host_key = _host_block(cfg, _host_key()).get("apiKey")
key = host_key or cfg.get("apiKey", "") or os.environ.get("HONCHO_API_KEY", "")
if not key:
base_url = cfg.get("baseUrl") or cfg.get("base_url") or os.environ.get("HONCHO_BASE_URL", "")
@ -462,21 +460,58 @@ def cmd_setup(args) -> None:
cfg.pop("base_url", None)
if is_local:
# --- Local: ask for base URL, skip or clear API key ---
# --- Local: ask for base URL, optionally accept a JWT for auth ---
current_url = cfg.get("baseUrl") or ""
new_url = _prompt("Base URL", default=current_url or "http://localhost:8000")
if new_url:
cfg["baseUrl"] = new_url
# For local no-auth, the SDK must not send an API key.
# We keep the key in config (for cloud switching later) but
# the client should skip auth when baseUrl is local.
current_key = cfg.get("apiKey", "")
if current_key:
print(f"\n API key present in config (kept for cloud/hybrid use).")
print(" Local connections will skip auth automatically.")
# Self-hosted Honcho can run with AUTH_USE_AUTH=true and an
# AUTH_JWT_SECRET on the server side. In that case clients must
# send a JWT signed with that secret as the bearer token (the
# Honcho SDK takes it via ``api_key=``). Cloud users got prompted
# for a key already; the local path historically skipped this and
# forced users to disable auth on the server. Offer the prompt
# here too. We store it under the host block (not the top-level
# apiKey) so ``get_honcho_client`` recognises it as an explicit
# local auth opt-in (see ``_host_has_key`` in client.py) and
# cloud/hybrid switching is unaffected.
current_host_key = hermes_host.get("apiKey", "")
masked = (
f"...{current_host_key[-8:]}"
if len(current_host_key) > 8
else ("set" if current_host_key else "not set")
)
print(
"\n Local Honcho auth (JWT signed with the server's "
"AUTH_JWT_SECRET)."
)
print(
" Leave blank if your server runs with AUTH_USE_AUTH=false. "
f"Current: {masked}"
)
new_local_key = _prompt(
"Local JWT / bearer token (blank to skip / keep current)",
secret=True,
)
if new_local_key:
hermes_host["apiKey"] = new_local_key
elif current_host_key:
print(" Keeping existing local JWT.")
else:
print("\n No API key set. Local no-auth ready.")
# Surface the top-level key situation for transparency.
top_key = cfg.get("apiKey", "")
if top_key:
print(
"\n Top-level API key present in config (kept for "
"cloud/hybrid use)."
)
print(
" Local connections will skip auth automatically "
"until a local JWT is set above."
)
else:
print("\n No local JWT set. Local no-auth ready.")
else:
# --- Cloud: set default base URL, require API key ---
cfg.pop("baseUrl", None) # cloud uses SDK default

View file

@ -32,6 +32,24 @@ logger = logging.getLogger(__name__)
HOST = "hermes"
def profile_host_key(profile: str | None) -> str:
"""Return the safe Honcho host key for a Hermes profile."""
if not profile or profile in {"default", "custom"}:
return HOST
sanitized = "".join(c if c.isalnum() or c in "_-" else "_" for c in profile).strip("_")
return f"{HOST}_{sanitized or 'profile'}"
def _host_block(raw: dict, host: str) -> dict:
"""Return host config, accepting legacy dot-form profile host keys."""
hosts = raw.get("hosts") or {}
block = hosts.get(host, {})
if block or not host.startswith(f"{HOST}_"):
return block
legacy = f"{HOST}.{host[len(HOST) + 1:]}"
return hosts.get(legacy, {})
def resolve_active_host() -> str:
"""Derive the Honcho host key from the active Hermes profile.
@ -47,8 +65,7 @@ def resolve_active_host() -> str:
try:
from hermes_cli.profiles import get_active_profile_name
profile = get_active_profile_name()
if profile and profile not in {"default", "custom"}:
return f"{HOST}.{profile}"
return profile_host_key(profile)
except Exception:
pass
return HOST
@ -406,7 +423,7 @@ class HonchoClientConfig:
logger.warning("Failed to read %s: %s, falling back to env", path, e)
return cls.from_env(host=resolved_host)
host_block = (raw.get("hosts") or {}).get(resolved_host, {})
host_block = _host_block(raw, resolved_host)
# A hosts.hermes block or explicit enabled flag means the user
# intentionally configured Honcho for this host.
_explicitly_configured = bool(host_block) or raw.get("enabled") is True
@ -811,7 +828,10 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
or "::1" in resolved_base_url
)
if _is_local:
# Check if the host block has its own apiKey (explicit local auth)
# Check if the host block has its own apiKey (explicit local auth).
# Auth-skipping is loopback-only: a stored key is likely a cloud key
# that would break a no-auth local server, so we substitute the SDK's
# required-non-empty placeholder unless the host block opts in.
_raw = config.raw or {}
_host_block = (_raw.get("hosts") or {}).get(config.host, {})
_host_has_key = bool(_host_block.get("apiKey"))
@ -819,6 +839,18 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
else:
effective_api_key = config.api_key
# The Honcho SDK's route builders (e.g. routes.workspaces()) already
# include the version prefix (e.g. "/v3/workspaces"). When a user-supplied
# base_url already ends in a version segment (e.g.
# "http://localhost:38000/v3", "https://honcho.my.ts.net/v3"), concatenating
# the two produces "/v3/v3/workspaces" → 404 on every call. This is a pure
# routing concern independent of host, so strip a trailing version segment
# from ANY base_url — loopback, LAN, custom domain, or cloud alike. The
# SDK then appends its own versioned paths correctly.
if resolved_base_url:
import re as _re
resolved_base_url = _re.sub(r"/v\d+/*$", "", resolved_base_url).rstrip("/")
kwargs: dict = {
"workspace_id": config.workspace_id,
"api_key": effective_api_key,

View file

@ -155,7 +155,8 @@ class Mem0MemoryProvider(MemoryProvider):
except Exception:
pass
existing.update(values)
config_path.write_text(json.dumps(existing, indent=2))
from utils import atomic_json_write
atomic_json_write(config_path, existing, mode=0o600)
def get_config_schema(self):
return [

View file

@ -152,7 +152,8 @@ def _save_supermemory_config(values: dict, hermes_home: str) -> None:
except Exception:
existing = {}
existing.update(values)
config_path.write_text(json.dumps(existing, indent=2, sort_keys=True) + "\n", encoding="utf-8")
from utils import atomic_json_write
atomic_json_write(config_path, existing, mode=0o600, sort_keys=True)
def _detect_category(text: str) -> str:

View file

@ -6093,16 +6093,17 @@ def _apply_yaml_config(yaml_cfg: dict, discord_cfg: dict) -> dict | None:
``gateway/config.py::load_gateway_config()`` before this migration.
The DiscordAdapter reads its runtime configuration via ``os.getenv()``
throughout the connect / handle code paths (``DISCORD_REQUIRE_MENTION``,
``DISCORD_FREE_RESPONSE_CHANNELS``, ``DISCORD_AUTO_THREAD``,
``DISCORD_REACTIONS``, ``DISCORD_IGNORED_CHANNELS``,
``DISCORD_ALLOWED_CHANNELS``, ``DISCORD_NO_THREAD_CHANNELS``,
``DISCORD_HISTORY_BACKFILL``, ``DISCORD_HISTORY_BACKFILL_LIMIT``,
``DISCORD_ALLOW_MENTION_*``, ``DISCORD_REPLY_TO_MODE``,
``DISCORD_THREAD_REQUIRE_MENTION``). Rather than rewrite ~50 call sites
inside the adapter to read from ``PlatformConfig.extra`` instead, this
hook keeps the existing env-driven model and merely owns the
YAMLenv translation here, next to the adapter that consumes it.
throughout the connect / handle code paths (``DISCORD_ALLOWED_USERS``,
``DISCORD_REQUIRE_MENTION``, ``DISCORD_FREE_RESPONSE_CHANNELS``,
``DISCORD_AUTO_THREAD``, ``DISCORD_REACTIONS``,
``DISCORD_IGNORED_CHANNELS``, ``DISCORD_ALLOWED_CHANNELS``,
``DISCORD_NO_THREAD_CHANNELS``, ``DISCORD_HISTORY_BACKFILL``,
``DISCORD_HISTORY_BACKFILL_LIMIT``, ``DISCORD_ALLOW_MENTION_*``,
``DISCORD_REPLY_TO_MODE``, ``DISCORD_THREAD_REQUIRE_MENTION``).
Rather than rewrite ~50 call sites inside the adapter to read from
``PlatformConfig.extra`` instead, this hook keeps the existing
env-driven model and merely owns the YAMLenv translation here, next to
the adapter that consumes it.
Env vars take precedence over YAML every assignment is guarded by
``not os.getenv(...)`` so explicit env vars survive a config.yaml
@ -6113,6 +6114,22 @@ def _apply_yaml_config(yaml_cfg: dict, discord_cfg: dict) -> dict | None:
os.environ["DISCORD_REQUIRE_MENTION"] = str(discord_cfg["require_mention"]).lower()
if "thread_require_mention" in discord_cfg and not os.getenv("DISCORD_THREAD_REQUIRE_MENTION"):
os.environ["DISCORD_THREAD_REQUIRE_MENTION"] = str(discord_cfg["thread_require_mention"]).lower()
platforms_cfg = yaml_cfg.get("platforms")
platform_extra_cfg = {}
if isinstance(platforms_cfg, dict):
discord_platform_cfg = platforms_cfg.get("discord")
if isinstance(discord_platform_cfg, dict):
candidate_extra = discord_platform_cfg.get("extra")
if isinstance(candidate_extra, dict):
platform_extra_cfg = candidate_extra
allowed_users_cfg = (
discord_cfg["allow_from"] if "allow_from" in discord_cfg
else platform_extra_cfg.get("allow_from")
)
if allowed_users_cfg is not None and not os.getenv("DISCORD_ALLOWED_USERS"):
if isinstance(allowed_users_cfg, list):
allowed_users_cfg = ",".join(str(v) for v in allowed_users_cfg)
os.environ["DISCORD_ALLOWED_USERS"] = str(allowed_users_cfg)
frc = discord_cfg.get("free_response_channels")
if frc is not None and not os.getenv("DISCORD_FREE_RESPONSE_CHANNELS"):
if isinstance(frc, list):

View file

@ -146,16 +146,16 @@ def _get_firecrawl_gateway_url() -> str:
def _is_tool_gateway_ready() -> bool:
"""Return True when gateway URL + Nous Subscriber token are available.
Reads ``read_nous_access_token`` and ``resolve_managed_tool_gateway``
Reads ``peek_nous_access_token`` and ``resolve_managed_tool_gateway``
via :mod:`tools.web_tools` rather than direct imports, so unit tests
that ``patch("tools.web_tools._read_nous_access_token", ...)`` see
that ``patch("tools.web_tools._peek_nous_access_token", ...)`` see
their patches honored. The names are re-exported on
:mod:`tools.web_tools` for exactly this reason.
"""
import tools.web_tools as _wt
return _wt.resolve_managed_tool_gateway(
"firecrawl", token_reader=_wt._read_nous_access_token
"firecrawl", token_reader=_wt._peek_nous_access_token
) is not None

View file

@ -87,7 +87,7 @@ edge-tts = ["edge-tts==7.2.7"]
modal = ["modal==1.3.4"]
daytona = ["daytona==0.155.0"]
hindsight = ["hindsight-client==0.6.1"]
dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "ty==0.0.21", "ruff==0.15.10", "setuptools==82.0.1"]
dev = ["debugpy==1.8.20", "pytest==9.0.2", "pytest-asyncio==1.3.0", "pytest-timeout==2.4.0", "mcp==1.26.0", "starlette==1.0.1", "ty==0.0.21", "ruff==0.15.10", "setuptools==82.0.1"] # starlette: CVE-2026-48710
messaging = ["python-telegram-bot[webhooks]==22.6", "discord.py[voice]==2.7.1", "aiohttp==3.13.3", "brotlicffi==1.2.0.1", "slack-bolt==1.27.0", "slack-sdk==3.40.1", "qrcode==7.4.2"]
cron = [] # croniter is now a core dependency; this extra kept for back-compat
slack = ["slack-bolt==1.27.0", "slack-sdk==3.40.1", "aiohttp==3.13.3"]
@ -114,14 +114,21 @@ pty = [
# without pulling in extra packages.
]
honcho = ["honcho-ai==2.0.1"]
mcp = ["mcp==1.26.0"]
# CVE-2026-48710 (BadHost): Starlette is pulled transitively by mcp's
# sse-starlette / HTTP-SSE stack (and by fastapi in the `web` extra). Before
# 1.0.1, a malformed Host header makes `request.url.path` desync from the path
# the ASGI router actually dispatched, so middleware/endpoints that gate on
# `request.url` can be bypassed. We pin a patched Starlette directly in every
# extra that exposes a Starlette-backed server surface so pip/uv can't resolve
# a vulnerable pre-1.0.1 transitive. Bump in lockstep with uv.lock.
mcp = ["mcp==1.26.0", "starlette==1.0.1"] # starlette: CVE-2026-48710
homeassistant = ["aiohttp==3.13.3"]
sms = ["aiohttp==3.13.3"]
# Computer use — macOS background desktop control via cua-driver (MCP stdio).
# The cua-driver binary itself is installed via `hermes tools` post-setup
# (curl install script); this extra just pins the MCP client used to talk
# to it, which is already provided by the `mcp` extra.
computer-use = ["mcp==1.26.0"]
computer-use = ["mcp==1.26.0", "starlette==1.0.1"] # starlette: CVE-2026-48710
acp = ["agent-client-protocol==0.9.0"]
# mistral: Voxtral STT + TTS. Pinned to an exact verified-clean version.
# The `mistralai` PyPI project was quarantined 2026-05-12 after the malicious
@ -174,7 +181,9 @@ youtube = [
"youtube-transcript-api==1.2.4",
]
# `hermes dashboard` (localhost SPA + API). Not in core to keep the default install lean.
web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0"]
# starlette==1.0.1 pinned for CVE-2026-48710 (BadHost) — fastapi pulls Starlette
# transitively and pre-1.0.1 is the vulnerable range. See the mcp extra above.
web = ["fastapi==0.133.1", "uvicorn[standard]==0.41.0", "starlette==1.0.1"]
all = [
# Policy (2026-05-12): `[all]` includes only extras that genuinely
# CAN'T be lazy-installed via `tools/lazy_deps.py` — i.e. things every
@ -216,7 +225,7 @@ hermes-agent = "run_agent:main"
hermes-acp = "acp_adapter.entry:main"
[tool.setuptools]
py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils"]
py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajectory_compressor", "toolset_distributions", "cli", "hermes_bootstrap", "hermes_constants", "hermes_state", "hermes_time", "hermes_logging", "utils", "mcp_serve"]
[tool.setuptools.package-data]
hermes_cli = ["web_dist/**/*", "tui_dist/**/*", "scripts/install.sh", "scripts/install.ps1"]

View file

@ -2195,6 +2195,126 @@ class AIAgent:
lines.append(f" • … and {remaining} more")
return "\n".join(lines)
def _turn_completion_explainer_enabled(self) -> bool:
"""Check whether the end-of-turn completion explainer footer is on.
Config path: ``display.turn_completion_explainer`` (bool, default
True). ``HERMES_TURN_COMPLETION_EXPLAINER`` env var overrides
config. Exposed as a method so tests can patch a single seam,
mirroring ``_file_mutation_verifier_enabled``.
"""
try:
import os as _os
env = _os.environ.get("HERMES_TURN_COMPLETION_EXPLAINER")
if env is not None:
return env.strip().lower() not in {"0", "false", "no", "off"}
# Read from the persisted config.yaml so gateway and CLI share
# the same setting. Import lazily to avoid a startup-time cycle.
try:
from hermes_cli.config import load_config as _load_config
_cfg = _load_config() or {}
except Exception:
_cfg = {}
_display = _cfg.get("display") if isinstance(_cfg, dict) else None
if isinstance(_display, dict) and "turn_completion_explainer" in _display:
return bool(_display.get("turn_completion_explainer"))
except Exception:
pass
return True # safe default: explainer on
@staticmethod
def _format_turn_completion_explanation(turn_exit_reason: str) -> str:
"""Render a user-facing explanation for an abnormal turn ending.
Maps the internal ``turn_exit_reason`` to a short, actionable
message so a turn that produced no usable assistant reply (empty
content after retries, a partial/truncated stream, a still-pending
tool result, or an iteration/budget limit) is never silent from
the UI's perspective — the symptom users report in #34452.
Returns an empty string for reasons that are NOT abnormal (e.g.
a normal ``text_response(...)`` exit), so callers can concatenate
or substitute unconditionally without warning on healthy turns
like a terse ``Done.``.
"""
if not turn_exit_reason:
return ""
reason = str(turn_exit_reason)
# Normal completion — stay quiet. ``text_response(...)`` is the
# healthy terminal; anything that produced a real reply is fine.
if reason.startswith("text_response"):
return ""
prefix = "⚠️ No reply: "
if reason == "empty_response_exhausted":
return (
prefix
+ "the model returned empty content after retries and any "
"fallback providers. Try `continue`, switch model/provider, "
"or inspect the tool output above."
)
if reason == "all_retries_exhausted_no_response":
return (
prefix
+ "all API retries were exhausted before a response was "
"produced (provider errors / rate limits). Try `continue` "
"or switch provider."
)
if reason == "partial_stream_recovery":
return (
prefix
+ "streaming stopped early and only a partial response was "
"recovered. Send `continue` to resume from where it stopped."
)
if reason == "fallback_prior_turn_content":
return (
prefix
+ "no new content was produced this turn; showing recovered "
"prior context. Send `continue` to retry."
)
if reason == "interrupted_during_api_call":
return (
prefix
+ "the request was interrupted mid-call before a reply was "
"received. Send `continue` to retry."
)
if reason == "budget_exhausted":
return (
prefix
+ "the per-turn iteration/cost budget was exhausted before a "
"final answer. Send `continue` to keep going."
)
if reason == "ollama_runtime_context_too_small":
return (
prefix
+ "the local model's context window was too small to finish. "
"Increase the context size or use a larger model."
)
if reason.startswith("max_iterations_reached"):
return (
prefix
+ "the maximum tool-iteration limit was reached before a "
"final answer. Send `continue` to keep going, or raise "
"`max_iterations`."
)
if reason.startswith("error_near_max_iterations"):
return (
prefix
+ "an error occurred near the iteration limit before a final "
"answer. Check the tool output above, then send `continue`."
)
if reason == "pending_tool_result":
return (
prefix
+ "the turn stopped while a tool result was still pending and "
"the model produced no follow-up text. Send `continue` to "
"let it summarize."
)
# Unknown/diagnostic-only reasons (e.g. "unknown", guardrail_halt
# which already surfaces its own message) — don't second-guess.
return ""
def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
"""Forwarder — see ``agent.agent_runtime_helpers.apply_pending_steer_to_tool_results``."""
from agent.agent_runtime_helpers import apply_pending_steer_to_tool_results
@ -3487,6 +3607,18 @@ class AIAgent:
from agent.chat_completion_helpers import try_activate_fallback
return try_activate_fallback(self, reason)
def _has_pending_fallback(self) -> bool:
"""Whether a fallback provider is actually available to switch to.
Used to gate user-facing "trying fallback..." status so we don't
announce a fallback that will never be attempted (the user has no
fallback chain configured). Mirrors the early-return guard in
``try_activate_fallback`` (#35314, #17446).
"""
chain = getattr(self, "_fallback_chain", None) or []
index = getattr(self, "_fallback_index", 0)
return index < len(chain)
# ── Per-turn primary restoration ─────────────────────────────────────
def _restore_primary_runtime(self) -> bool:

View file

@ -540,6 +540,7 @@ check_python() {
if PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION" 2>/dev/null)"; then
PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
log_success "Python found: $PYTHON_FOUND_VERSION"
ensure_fts5
return 0
fi
@ -549,6 +550,7 @@ check_python() {
PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION")"
PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
log_success "Python installed: $PYTHON_FOUND_VERSION"
ensure_fts5
else
log_error "Failed to install Python $PYTHON_VERSION"
log_info "Install Python $PYTHON_VERSION manually, then re-run this script"
@ -556,6 +558,51 @@ check_python() {
fi
}
# Probe whether $1 (a python executable) links a SQLite with the FTS5
# module compiled in. Hermes' session store (hermes_state.py) creates FTS5
# virtual tables for full-text session search; a SQLite without FTS5 makes
# the bundled-python path unusable for that feature. Returns 0 if FTS5 works.
_python_has_fts5() {
"$1" - <<'PY' 2>/dev/null
import sqlite3, sys
try:
sqlite3.connect(":memory:").execute("CREATE VIRTUAL TABLE t USING fts5(x)")
except Exception:
sys.exit(1)
PY
}
# Guarantee the resolved uv-managed interpreter ships FTS5. uv's Python
# distributions only gained FTS5 in mid-2025 (python-build-standalone #694),
# so a stale interpreter already in uv's store — which `uv python find`
# happily reuses — can lack it. When that happens, force a reinstall of the
# latest patch for $PYTHON_VERSION (which has FTS5) and re-resolve. This keeps
# the supported install path's session search working without bundling a
# second SQLite or asking the user to do anything.
ensure_fts5() {
[ -n "${PYTHON_PATH:-}" ] || return 0
if _python_has_fts5 "$PYTHON_PATH"; then
return 0
fi
log_warn "Resolved Python's SQLite lacks the FTS5 module (session search needs it)."
log_info "Reinstalling a current Python $PYTHON_VERSION with FTS5 via uv..."
if "$UV_CMD" python install "$PYTHON_VERSION" --reinstall >/dev/null 2>&1; then
PYTHON_PATH="$("$UV_CMD" python find "$PYTHON_VERSION" 2>/dev/null)"
PYTHON_FOUND_VERSION="$("$PYTHON_PATH" --version 2>/dev/null)"
fi
if [ -n "${PYTHON_PATH:-}" ] && _python_has_fts5 "$PYTHON_PATH"; then
log_success "FTS5 available ($PYTHON_FOUND_VERSION)"
else
# Could not obtain an FTS5-capable interpreter (offline, pinned env,
# etc.). Install proceeds — Hermes degrades gracefully and disables
# only full-text session search — but warn so it isn't a silent gap.
log_warn "Could not obtain an FTS5-capable Python. Hermes will run, but"
log_warn "full-text session search will be disabled until FTS5 is present."
fi
}
check_git() {
log_info "Checking Git..."

View file

@ -45,6 +45,10 @@ ACP_REGISTRY_MANIFEST = REPO_ROOT / "acp_registry" / "agent.json"
# Auto-extracted from noreply emails + manual overrides
AUTHOR_MAP = {
"zhipengli@thebrainly.ai": "a1245582339",
"mathijs.vd.hurk@gmail.com": "mathijsvandenhurk",
"drpelagik@gmail.com": "SeaXen",
"lengr@users.noreply.github.com": "LengR",
"metalclaudbot@gmail.com": "HashClawAI",
"tonybear55665566@gmail.com": "TonyPepeBear",
"kaspersniels@gmail.com": "nielskaspers",
@ -67,6 +71,7 @@ AUTHOR_MAP = {
"wangpuv@hotmail.com": "wangpuv",
"202622897+ticketclosed-wontfix@users.noreply.github.com": "ticketclosed-wontfix",
"wuxuebin1993@gmail.com": "victorGPT",
"wei.chen.coder@gmail.com": "wenchengxucool",
"frowte3k@gmail.com": "Frowtek",
"211828103+julio-cloudvisor@users.noreply.github.com": "julio-cloudvisor",
"17778+kweiner@users.noreply.github.com": "kweiner",
@ -220,6 +225,7 @@ AUTHOR_MAP = {
"264291321+v1b3coder@users.noreply.github.com": "v1b3coder",
"silverchris@foxmail.com": "ming1523",
"maksesipov@gmail.com": "Qwinty",
"byquenox@gmail.com": "Que0x",
"denisamania@gmail.com": "CalmProton",
"308068+mbac@users.noreply.github.com": "mbac",
"nicoechaniz@altermundi.net": "nicoechaniz",
@ -649,8 +655,10 @@ AUTHOR_MAP = {
"alexazzjjtt@163.com": "alexzhu0",
"pub_forgreatagent@antgroup.com": "AntAISecurityLab",
"252620095+briandevans@users.noreply.github.com": "briandevans",
"incharge.automation@gmail.com": "inchargeautomation-lab",
"danielrpike9@gmail.com": "Bartok9",
"96944678+ymylive@users.noreply.github.com": "sweetcornna",
"laflamme@illinoisalumni.org": "briancl2",
"skozyuk@cruxexperts.com": "CruxExperts",
"154585401+LeonSGP43@users.noreply.github.com": "LeonSGP43",
"12250313+Kailigithub@users.noreply.github.com": "Kailigithub",
@ -1203,6 +1211,7 @@ AUTHOR_MAP = {
"86501179+1RB@users.noreply.github.com": "1RB", # PR #25462 salvage (discord forwarded messages)
"44045943+ayushere@users.noreply.github.com": "ayushere", # PR #25342 salvage (memory teardown leak)
"15791290+domtriola@users.noreply.github.com": "domtriola", # PR #25424 salvage (docs tirith link)
"tuancookiez@gmail.com": "tuancookiez-hub", # PR #34865 salvage (LSP Windows .cmd shim spawn, #34864)
"284216128+ephron-ren@users.noreply.github.com": "ephron-ren", # PR #25358 salvage (MiMo reasoning echo-back)
"96843562+freqyfreqy@users.noreply.github.com": "freqyfreqy", # PR #25423 salvage (docs LSP worktree -> repo)
"54306477+fu576@users.noreply.github.com": "fu576", # PR #25369 salvage (api_mode not inherited cross-provider)

View file

@ -129,7 +129,11 @@ def _run_gws(parts: list[str], *, params: dict | None = None, body: dict | None
def _headers_dict(msg: dict) -> dict[str, str]:
return {h["name"]: h["value"] for h in msg.get("payload", {}).get("headers", [])}
return {
h["name"].lower(): h["value"]
for h in msg.get("payload", {}).get("headers", [])
if h.get("name")
}
def _extract_message_body(msg: dict) -> str:
@ -230,10 +234,10 @@ def gmail_search(args):
{
"id": msg["id"],
"threadId": msg["threadId"],
"from": headers.get("From", ""),
"to": headers.get("To", ""),
"subject": headers.get("Subject", ""),
"date": headers.get("Date", ""),
"from": headers.get("from", ""),
"to": headers.get("to", ""),
"subject": headers.get("subject", ""),
"date": headers.get("date", ""),
"snippet": msg.get("snippet", ""),
"labels": msg.get("labelIds", []),
}
@ -260,10 +264,10 @@ def gmail_search(args):
output.append({
"id": msg["id"],
"threadId": msg["threadId"],
"from": headers.get("From", ""),
"to": headers.get("To", ""),
"subject": headers.get("Subject", ""),
"date": headers.get("Date", ""),
"from": headers.get("from", ""),
"to": headers.get("to", ""),
"subject": headers.get("subject", ""),
"date": headers.get("date", ""),
"snippet": msg.get("snippet", ""),
"labels": msg.get("labelIds", []),
})
@ -281,10 +285,10 @@ def gmail_get(args):
result = {
"id": msg["id"],
"threadId": msg["threadId"],
"from": headers.get("From", ""),
"to": headers.get("To", ""),
"subject": headers.get("Subject", ""),
"date": headers.get("Date", ""),
"from": headers.get("from", ""),
"to": headers.get("to", ""),
"subject": headers.get("subject", ""),
"date": headers.get("date", ""),
"labels": msg.get("labelIds", []),
"body": _extract_message_body(msg),
}
@ -300,10 +304,10 @@ def gmail_get(args):
result = {
"id": msg["id"],
"threadId": msg["threadId"],
"from": headers.get("From", ""),
"to": headers.get("To", ""),
"subject": headers.get("Subject", ""),
"date": headers.get("Date", ""),
"from": headers.get("from", ""),
"to": headers.get("to", ""),
"subject": headers.get("subject", ""),
"date": headers.get("date", ""),
"labels": msg.get("labelIds", []),
"body": _extract_message_body(msg),
}
@ -314,12 +318,12 @@ def gmail_get(args):
def gmail_send(args):
if _gws_binary():
message = MIMEText(args.body, "html" if args.html else "plain")
message["to"] = args.to
message["subject"] = args.subject
message["To"] = args.to
message["Subject"] = args.subject
if args.cc:
message["cc"] = args.cc
message["Cc"] = args.cc
if args.from_header:
message["from"] = args.from_header
message["From"] = args.from_header
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
body = {"raw": raw}
@ -336,12 +340,12 @@ def gmail_send(args):
service = build_service("gmail", "v1")
message = MIMEText(args.body, "html" if args.html else "plain")
message["to"] = args.to
message["subject"] = args.subject
message["To"] = args.to
message["Subject"] = args.subject
if args.cc:
message["cc"] = args.cc
message["Cc"] = args.cc
if args.from_header:
message["from"] = args.from_header
message["From"] = args.from_header
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
body = {"raw": raw}
@ -367,18 +371,18 @@ def gmail_reply(args):
)
headers = _headers_dict(original)
subject = headers.get("Subject", "")
subject = headers.get("subject", "")
if not subject.startswith("Re:"):
subject = f"Re: {subject}"
message = MIMEText(args.body)
message["to"] = headers.get("From", "")
message["subject"] = subject
message["To"] = headers.get("from", "")
message["Subject"] = subject
if args.from_header:
message["from"] = args.from_header
if headers.get("Message-ID"):
message["In-Reply-To"] = headers["Message-ID"]
message["References"] = headers["Message-ID"]
message["From"] = args.from_header
if headers.get("message-id"):
message["In-Reply-To"] = headers["message-id"]
message["References"] = headers["message-id"]
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
result = _run_gws(
@ -396,18 +400,18 @@ def gmail_reply(args):
).execute()
headers = _headers_dict(original)
subject = headers.get("Subject", "")
subject = headers.get("subject", "")
if not subject.startswith("Re:"):
subject = f"Re: {subject}"
message = MIMEText(args.body)
message["to"] = headers.get("From", "")
message["subject"] = subject
message["To"] = headers.get("from", "")
message["Subject"] = subject
if args.from_header:
message["from"] = args.from_header
if headers.get("Message-ID"):
message["In-Reply-To"] = headers["Message-ID"]
message["References"] = headers["Message-ID"]
message["From"] = args.from_header
if headers.get("message-id"):
message["In-Reply-To"] = headers["message-id"]
message["References"] = headers["message-id"]
raw = base64.urlsafe_b64encode(message.as_bytes()).decode()
body = {"raw": raw, "threadId": original["threadId"]}

View file

@ -94,6 +94,47 @@ def test_install_npm_works_without_extras(tmp_path, monkeypatch):
assert install_targets == ["pyright"]
def test_existing_binary_finds_windows_wrapper_in_staging(tmp_path, monkeypatch):
"""Installed Windows shims should satisfy later status/probe calls."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from agent.lsp import install as install_mod
wrapper = install_mod.hermes_lsp_bin_dir() / "pyright-langserver.cmd"
wrapper.write_text("@echo off\n")
wrapper.chmod(0o755)
monkeypatch.setattr(install_mod, "_is_windows", lambda: True)
monkeypatch.setattr(install_mod.shutil, "which", lambda _name: None)
assert install_mod._existing_binary("pyright-langserver") == str(wrapper)
assert install_mod.detect_status("pyright") == "installed"
def test_install_pip_finds_windows_scripts_launcher(tmp_path, monkeypatch):
"""pip console scripts can land in Scripts/ on native Windows."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from agent.lsp import install as install_mod
def fake_run(cmd, **kwargs):
scripts_dir = install_mod.hermes_lsp_bin_dir().parent / "python-packages" / "Scripts"
scripts_dir.mkdir(parents=True, exist_ok=True)
launcher = scripts_dir / "fake-language-server.exe"
launcher.write_text("launcher\n")
launcher.chmod(0o755)
return MagicMock(returncode=0, stderr="")
monkeypatch.setattr(install_mod, "_is_windows", lambda: True)
monkeypatch.setattr(install_mod.subprocess, "run", fake_run)
resolved = install_mod._install_pip("fake-lsp", "fake-language-server")
assert resolved is not None
assert resolved.endswith("fake-language-server.exe")
assert (install_mod.hermes_lsp_bin_dir() / "fake-language-server.exe").exists()
# ---------------------------------------------------------------------------
# Fix 2: ``hermes lsp status`` surfaces shellcheck-missing for bash
# ---------------------------------------------------------------------------

View file

@ -41,6 +41,8 @@ class TestShouldCompress:
class TestUpdateFromResponse:
def test_updates_fields(self, compressor):
compressor.awaiting_real_usage_after_compression = True
compressor.last_compression_rough_tokens = 90_000
compressor.update_from_response({
"prompt_tokens": 5000,
"completion_tokens": 1000,
@ -48,12 +50,39 @@ class TestUpdateFromResponse:
})
assert compressor.last_prompt_tokens == 5000
assert compressor.last_completion_tokens == 1000
assert compressor.last_real_prompt_tokens == 5000
assert compressor.last_rough_tokens_when_real_prompt_fit == 90_000
assert compressor.awaiting_real_usage_after_compression is False
def test_missing_fields_default_zero(self, compressor):
compressor.update_from_response({})
assert compressor.last_prompt_tokens == 0
class TestPreflightDeferral:
def test_defers_when_recent_real_usage_fit_and_rough_growth_is_small(self, compressor):
compressor.threshold_tokens = 85_000
compressor.last_real_prompt_tokens = 50_000
compressor.last_rough_tokens_when_real_prompt_fit = 90_000
assert compressor.should_defer_preflight_to_real_usage(93_000) is True
assert compressor.last_rough_tokens_when_real_prompt_fit == 93_000
def test_does_not_defer_when_rough_growth_is_large(self, compressor):
compressor.threshold_tokens = 85_000
compressor.last_real_prompt_tokens = 50_000
compressor.last_rough_tokens_when_real_prompt_fit = 90_000
assert compressor.should_defer_preflight_to_real_usage(100_000) is False
def test_does_not_defer_without_recent_real_usage(self, compressor):
compressor.threshold_tokens = 85_000
compressor.last_real_prompt_tokens = 0
compressor.last_rough_tokens_when_real_prompt_fit = 90_000
assert compressor.should_defer_preflight_to_real_usage(93_000) is False
class TestCompress:
def _make_messages(self, n):

View file

@ -123,55 +123,6 @@ class TestEstimateMessagesTokensRough:
# =========================================================================
class TestDefaultContextLengths:
def test_claude_models_context_lengths(self):
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
if "claude" not in key:
continue
# Claude 4.6+ models (4.6, 4.7, 4.8) have 1M context at standard
# API pricing (no long-context premium). Older Claude 4.x and
# 3.x models cap at 200k.
if any(tag in key for tag in ("4.6", "4-6", "4.7", "4-7", "4.8", "4-8")):
assert value == 1000000, f"{key} should be 1000000"
else:
assert value == 200000, f"{key} should be 200000"
def test_gpt4_models_128k_or_1m(self):
# gpt-4.1 and gpt-4.1-mini have 1M context; other gpt-4* have 128k
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
if "gpt-4" in key and "gpt-4.1" not in key:
assert value == 128000, f"{key} should be 128000"
def test_gpt41_models_1m(self):
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
if "gpt-4.1" in key:
assert value == 1047576, f"{key} should be 1047576"
def test_gemini_models_1m(self):
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
if "gemini" in key:
assert value == 1048576, f"{key} should be 1048576"
def test_grok_models_context_lengths(self):
# xAI /v1/models does not return context_length metadata, so
# DEFAULT_CONTEXT_LENGTHS must cover the Grok family explicitly.
# Values sourced from models.dev (2026-04).
expected = {
"grok-4.20": 2000000,
"grok-4-fast": 2000000,
"grok-4": 256000,
"grok-build": 256000,
"grok-code-fast": 256000,
"grok-3": 131072,
"grok-2": 131072,
"grok-2-vision": 8192,
"grok": 131072,
}
for key, value in expected.items():
assert key in DEFAULT_CONTEXT_LENGTHS, f"{key} missing from DEFAULT_CONTEXT_LENGTHS"
assert DEFAULT_CONTEXT_LENGTHS[key] == value, (
f"{key} should be {value}, got {DEFAULT_CONTEXT_LENGTHS[key]}"
)
def test_grok_substring_matching(self):
# Longest-first substring matching must resolve the real xAI model
# IDs to the correct fallback entries without 128k probe-down.
@ -268,13 +219,6 @@ class TestDefaultContextLengths:
f"{model_id}: expected {expected_ctx}, got {actual}"
)
def test_all_values_positive(self):
for key, value in DEFAULT_CONTEXT_LENGTHS.items():
assert value > 0, f"{key} has non-positive context length"
def test_dict_is_not_empty(self):
assert len(DEFAULT_CONTEXT_LENGTHS) >= 10
# =========================================================================
# Codex OAuth context-window resolution (provider="openai-codex")
@ -1141,12 +1085,6 @@ class TestContextProbeTiers:
for i in range(len(CONTEXT_PROBE_TIERS) - 1):
assert CONTEXT_PROBE_TIERS[i] > CONTEXT_PROBE_TIERS[i + 1]
def test_first_tier_is_256k(self):
assert CONTEXT_PROBE_TIERS[0] == 256_000
def test_last_tier_is_8k(self):
assert CONTEXT_PROBE_TIERS[-1] == 8_000
class TestGetNextProbeTier:
def test_from_256k(self):

View file

@ -82,17 +82,6 @@ SAMPLE_REGISTRY = {
class TestProviderMapping:
def test_all_mapped_providers_are_strings(self):
for hermes_id, mdev_id in PROVIDER_TO_MODELS_DEV.items():
assert isinstance(hermes_id, str)
assert isinstance(mdev_id, str)
def test_known_providers_mapped(self):
assert PROVIDER_TO_MODELS_DEV["anthropic"] == "anthropic"
assert PROVIDER_TO_MODELS_DEV["copilot"] == "github-copilot"
assert PROVIDER_TO_MODELS_DEV["stepfun"] == "stepfun"
assert PROVIDER_TO_MODELS_DEV["kilocode"] == "kilo"
def test_xai_oauth_uses_xai_catalog(self):
assert PROVIDER_TO_MODELS_DEV["xai"] == "xai"
assert PROVIDER_TO_MODELS_DEV["xai-oauth"] == "xai"

View file

@ -0,0 +1,141 @@
"""Regression coverage for #35344: a resumed session must not let a stale
``## Active Task`` from an inherited compaction handoff hijack the reply to a
new, unrelated user message.
The failure mode (real report): a lineage was compacted, producing a handoff
whose ``## Active Task`` described task A. The lineage was resumed later and
the user asked about an unrelated task B. The model answered with A because
the handoff's resume directive outranked the fresh ask.
The structural fix lives in ``SUMMARY_PREFIX``: the handoff is framed as
reference-only and the latest user message explicitly *wins* on conflict, with
named reverse-signal verbs. Two invariants guard the resume path specifically:
1. A handoff persisted under the OLD (conflicting) prefix is re-normalized to
the CURRENT prefix when it is re-compacted on a resumed lineage so a
pre-fix stale handoff cannot keep its "resume exactly" directive forever.
2. The current handoff prefix contains an unambiguous "latest message wins /
discard stale Active Task" rule, so an unrelated new ask is privileged over
the inherited ``## Active Task``.
These are content/structural assertions (no live model call) they pin the
mechanism that makes the stale task historical rather than active.
"""
from agent.context_compressor import (
SUMMARY_PREFIX,
LEGACY_SUMMARY_PREFIX,
ContextCompressor,
)
# The conflicting prefix that shipped before the #35344 fix. A handoff
# persisted in a resumed lineage could carry this verbatim.
_OLD_CONFLICTING_PREFIX = (
"[CONTEXT COMPACTION — REFERENCE ONLY] Earlier turns were compacted "
"into the summary below. This is a handoff from a previous context "
"window — treat it as background reference, NOT as active instructions. "
"Do NOT answer questions or fulfill requests mentioned in this summary; "
"they were already addressed. "
"Your current task is identified in the '## Active Task' section of the "
"summary — resume exactly from there. "
"Respond ONLY to the latest user message "
"that appears AFTER this summary. The current session state (files, "
"config, etc.) may reflect work described here — avoid repeating it:"
)
def test_latest_message_wins_over_inherited_active_task():
"""The handoff must explicitly privilege the latest user message over a
stale ``## Active Task`` — the core #35344 contract."""
lower = SUMMARY_PREFIX.lower()
assert "latest user message" in lower
assert "## active task" in lower
# Conflict-resolution must be explicit, not implied.
assert "wins" in lower or "supersede" in lower
assert "discard" in lower
def test_no_resume_exactly_directive_can_hijack():
"""The directive that caused the hijack ("resume exactly from Active
Task") must be gone."""
assert "resume exactly" not in SUMMARY_PREFIX.lower()
def test_resumed_stale_handoff_gets_renormalized_to_current_prefix():
"""A handoff persisted under the OLD conflicting prefix (e.g. saved before
the fix and inherited into a resumed lineage) is upgraded to the CURRENT
prefix when re-normalized on re-compaction so the "resume exactly"
directive cannot survive into a resumed session."""
stale_body = (
"## Active Task\n"
"User asked: 'Migrate the billing module to Stripe'\n\n"
"## Goal\nMigrate billing.\n"
)
stale_handoff = f"{_OLD_CONFLICTING_PREFIX}\n{stale_body}"
# Sanity: the fixture really does carry the old directive.
assert "resume exactly" in stale_handoff.lower()
renormalized = ContextCompressor._with_summary_prefix(stale_handoff)
# The body is preserved...
assert "Migrate the billing module to Stripe" in renormalized
# ...but the conflicting directive is stripped and replaced with the
# current latest-message-wins framing.
assert "resume exactly" not in renormalized.lower()
assert renormalized.startswith(SUMMARY_PREFIX)
assert "wins" in renormalized.lower()
def test_legacy_prefix_handoff_also_renormalized():
"""The same upgrade applies to the oldest ``[CONTEXT SUMMARY]:`` handoff
format that may sit in a long-lived resumed lineage."""
legacy = f"{LEGACY_SUMMARY_PREFIX} ## Active Task\nUser asked: 'task A'"
renormalized = ContextCompressor._with_summary_prefix(legacy)
assert renormalized.startswith(SUMMARY_PREFIX)
assert LEGACY_SUMMARY_PREFIX not in renormalized
assert "task A" in renormalized
def test_inherited_handoff_detected_in_resumed_protected_head():
"""On a resumed lineage the handoff commonly sits right after the system
prompt (in the protected head). ``_find_latest_context_summary`` must
detect it there so re-compaction rehydrates state from it rather than
serializing it as a fresh user turn (which is what let the stale Active
Task read as live intent)."""
messages = [
{"role": "system", "content": "system prompt"},
{"role": "user", "content": f"{SUMMARY_PREFIX}\n## Active Task\nUser asked: 'task A'"},
{"role": "assistant", "content": "ok"},
{"role": "user", "content": "Unrelated task B: what's the capital of France?"},
]
# Search the whole post-system range.
idx, body = ContextCompressor._find_latest_context_summary(
messages, 1, len(messages)
)
assert idx == 1, "handoff in protected head must be found"
assert "task A" in body
# The detected body is stripped of the prefix (treated as state, not a
# standalone instruction message).
assert not body.startswith(SUMMARY_PREFIX)
def test_historical_prefixed_handoff_detected_and_stripped():
"""A pre-fix handoff (old conflicting prefix) inherited into a resumed
lineage must still be recognized as a context summary AND have its old
directive stripped on detection otherwise re-compaction serializes the
stale 'resume exactly' text as a fresh turn."""
messages = [
{"role": "system", "content": "system prompt"},
{"role": "user", "content": f"{_OLD_CONFLICTING_PREFIX}\n## Active Task\nUser asked: 'task A'"},
{"role": "assistant", "content": "ok"},
{"role": "user", "content": "Unrelated task B"},
]
idx, body = ContextCompressor._find_latest_context_summary(
messages, 1, len(messages)
)
assert idx == 1
assert "task A" in body
assert "resume exactly" not in body.lower()

View file

@ -0,0 +1,226 @@
"""Regression test: set_runtime_main() must pass base_url/api_key/api_mode
so that _resolve_auto() can route custom: providers in Step 1.
Fixes https://github.com/NousResearch/hermes-agent/issues/34777
"""
import pytest
from unittest.mock import patch, MagicMock
def _get_globals(mod):
"""Read runtime globals without triggering redaction."""
return {
"provider": mod._RUNTIME_MAIN_PROVIDER,
"model": mod._RUNTIME_MAIN_MODEL,
"base_url": mod._RUNTIME_MAIN_BASE_URL,
"cred": mod._RUNTIME_MAIN_API_KEY, # renamed to avoid redaction
"api_mode": mod._RUNTIME_MAIN_API_MODE,
}
class TestSetRuntimeMainCustomProvider:
"""set_runtime_main must propagate base_url/api_key/api_mode for custom providers."""
def test_globals_stored(self):
"""set_runtime_main stores all five fields in process-local globals."""
import agent.auxiliary_client as mod
mod.clear_runtime_main()
try:
mod.set_runtime_main(
"custom:my-router",
"glm-5.1",
base_url="https://my-server.example.com/v1",
api_key="sk-test-key",
api_mode="chat_completions",
)
g = _get_globals(mod)
assert g["provider"] == "custom:my-router"
assert g["model"] == "glm-5.1"
assert g["base_url"] == "https://my-server.example.com/v1"
assert g["cred"] == "sk-test-key"
assert g["api_mode"] == "chat_completions"
finally:
mod.clear_runtime_main()
def test_clear_resets_all_globals(self):
"""clear_runtime_main resets all five globals to empty."""
import agent.auxiliary_client as mod
mod.set_runtime_main(
"custom:x", "m",
base_url="https://x.example.com",
api_key="sk-abc",
api_mode="chat_completions",
)
mod.clear_runtime_main()
g = _get_globals(mod)
for v in g.values():
assert v == "", f"Expected empty, got {v!r}"
def test_resolve_auto_uses_globals_for_custom_provider(self):
"""_resolve_auto reads base_url/api_key from globals when main_runtime is None."""
import agent.auxiliary_client as mod
mod.clear_runtime_main()
try:
mod.set_runtime_main(
"custom:test-router",
"test-model",
base_url="https://custom-endpoint.example.com/v1",
api_key="sk-test-123",
)
with patch.object(mod, "resolve_provider_client") as mock_resolve:
mock_resolve.return_value = (MagicMock(), "test-model")
client, resolved = mod._resolve_auto(main_runtime=None)
mock_resolve.assert_called_once()
call_args = mock_resolve.call_args
assert call_args[0][0] == "custom"
assert call_args[1]["explicit_base_url"] == "https://custom-endpoint.example.com/v1"
assert call_args[1]["explicit_api_key"] == "sk-test-123"
finally:
mod.clear_runtime_main()
def test_explicit_main_runtime_takes_precedence(self):
"""When main_runtime dict has values, globals are NOT used."""
import agent.auxiliary_client as mod
mod.clear_runtime_main()
try:
mod.set_runtime_main(
"custom:router-a",
"model-a",
base_url="https://from-global.example.com",
api_key="sk-global",
)
with patch.object(mod, "resolve_provider_client") as mock_resolve:
mock_resolve.return_value = (MagicMock(), "model-b")
main_rt = {
"provider": "custom:router-b",
"model": "model-b",
"base_url": "https://from-dict.example.com",
"api_key": "sk-dict",
}
mod._resolve_auto(main_runtime=main_rt)
call_args = mock_resolve.call_args[1]
assert call_args["explicit_base_url"] == "https://from-dict.example.com"
assert call_args["explicit_api_key"] == "sk-dict"
finally:
mod.clear_runtime_main()
def test_backward_compatible_defaults(self):
"""Calling set_runtime_main with only positional args still works."""
import agent.auxiliary_client as mod
mod.clear_runtime_main()
try:
mod.set_runtime_main("openrouter", "gpt-4o")
g = _get_globals(mod)
assert g["provider"] == "openrouter"
assert g["model"] == "gpt-4o"
assert g["base_url"] == ""
assert g["cred"] == ""
assert g["api_mode"] == ""
finally:
mod.clear_runtime_main()
class TestResolveAutoCustomEndToEnd:
"""End-to-end routing assertions — build a *real* client (no mock on
resolve_provider_client) and verify the auxiliary auto-detect chain lands
on the user's custom endpoint instead of falling through to the aggregator
chain. These guard the actual user-visible symptom in #34777 (aux tasks
silently routed to a fallback provider) rather than just the wiring.
"""
@staticmethod
def _client_base_url(client):
for chain in (("base_url",), ("_client", "base_url")):
obj = client
try:
for attr in chain:
obj = getattr(obj, attr)
return str(obj)
except AttributeError:
continue
return None
def test_config_less_custom_endpoint_routes_via_global(self, tmp_path, monkeypatch):
"""custom:<name> with NO config entry: the live base_url carried by
set_runtime_main() must build a real client at that endpoint not
fall through to Step 2 (the regression in #34777)."""
import agent.auxiliary_client as mod
# Hermetic: no aggregator creds, no stale OPENAI_BASE_URL.
for var in ("OPENROUTER_API_KEY", "NOUS_API_KEY", "OPENAI_API_KEY",
"OPENAI_BASE_URL"):
monkeypatch.delenv(var, raising=False)
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
(hermes_home / "config.yaml").write_text(
"model:\n"
" default: glm-5.1\n"
" provider: 'custom:ephemeral'\n"
" base_url: ''\n"
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
mod.clear_runtime_main()
try:
mod.set_runtime_main(
"custom:ephemeral",
"glm-5.1",
base_url="https://ephemeral.live/v1",
api_key="sk-live",
)
client, resolved = mod.resolve_provider_client("auto", None)
assert client is not None, (
"config-less custom endpoint fell through to Step 2 — "
"the #34777 bug is back"
)
assert resolved == "glm-5.1"
base = self._client_base_url(client)
assert base and base.rstrip("/") == "https://ephemeral.live/v1"
finally:
mod.clear_runtime_main()
def test_named_custom_with_config_entry_still_routes(self, tmp_path, monkeypatch):
"""Regression guard: custom:<name> WITH a custom_providers entry must
still resolve to that entry's endpoint. An earlier competing fix
collapsed the provider to bare ``custom`` before resolution, which
broke the named-custom branch and returned None here."""
import agent.auxiliary_client as mod
for var in ("OPENROUTER_API_KEY", "NOUS_API_KEY", "OPENAI_API_KEY",
"OPENAI_BASE_URL"):
monkeypatch.delenv(var, raising=False)
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
(hermes_home / "config.yaml").write_text(
"model:\n"
" default: glm-5.1\n"
" provider: 'custom:openclaw'\n"
" base_url: ''\n"
"custom_providers:\n"
" - name: openclaw\n"
" base_url: 'https://withcfg.example/v1'\n"
" model: glm-5.1\n"
" api_key: cfg-key\n"
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# No live base_url carried — resolution must come from config alone,
# via the named-custom branch in resolve_provider_client.
mod.clear_runtime_main()
try:
mod.set_runtime_main("custom:openclaw", "glm-5.1")
client, resolved = mod.resolve_provider_client("auto", None)
assert client is not None
base = self._client_base_url(client)
assert base and base.rstrip("/") == "https://withcfg.example/v1"
finally:
mod.clear_runtime_main()

View file

@ -0,0 +1,62 @@
"""Pin the semantics of SUMMARY_PREFIX so the compaction handoff doesn't
re-introduce conflicting instructions.
Background: SUMMARY_PREFIX previously contained two contradictory directives:
1. "treat it as background reference, NOT as active instructions"
"Do NOT answer questions or fulfill requests mentioned in this summary"
"Respond ONLY to the latest user message that appears AFTER this summary"
2. "Your current task is identified in the '## Active Task' section of the
summary resume exactly from there."
When the latest user message contradicted Active Task (e.g. "stop the
i18n refactor", "never mind, look at grafana"), the model often followed
(2) anyway because "resume exactly" is a strong directive leading to
the agent repeatedly re-surfacing already-cancelled work across turns.
These tests pin the post-fix invariants so the conflict cannot regress.
"""
from agent.context_compressor import SUMMARY_PREFIX
def test_no_resume_exactly_directive():
"""The prefix must not tell the model to resume Active Task verbatim."""
assert "resume exactly" not in SUMMARY_PREFIX.lower()
def test_latest_message_wins_on_conflict():
"""The prefix must explicitly say latest user message wins on conflict."""
lower = SUMMARY_PREFIX.lower()
assert "latest user message" in lower
# Must have an explicit conflict-resolution rule.
assert "wins" in lower or "supersede" in lower or "discard" in lower
def test_reverse_signals_called_out():
"""Reverse signals (stop/undo/never mind/topic change) must be named so
the model recognizes them as cancellation triggers, not just background."""
lower = SUMMARY_PREFIX.lower()
# At least a few of the canonical reverse-signal verbs should appear.
reverse_terms = ["stop", "undo", "roll back", "never mind", "just verify"]
hits = sum(1 for t in reverse_terms if t in lower)
assert hits >= 3, (
f"Expected ≥3 reverse-signal terms in SUMMARY_PREFIX, found {hits}. "
"Without naming them the model treats reverse signals as ordinary "
"context and keeps pushing the cancelled task."
)
def test_summary_marked_reference_only():
"""The REFERENCE ONLY framing must remain — it's the entire point."""
assert "REFERENCE ONLY" in SUMMARY_PREFIX
assert "background reference" in SUMMARY_PREFIX
assert "NOT as active instructions" in SUMMARY_PREFIX
def test_memory_authority_preserved():
"""The fix must not weaken the MEMORY.md / USER.md authority clause."""
assert "MEMORY.md" in SUMMARY_PREFIX
assert "USER.md" in SUMMARY_PREFIX
assert "authoritative" in SUMMARY_PREFIX

View file

@ -75,6 +75,27 @@ class TestLightModeDetection:
assert cli_mod._detect_light_mode() is True
class TestOsc11Probe:
"""The OSC 11 background probe must never run where its reply can leak
into prompt_toolkit's input (a late BEL-terminated reply reads as Ctrl+G
= open-editor, trapping the user in a stray editor). Guard the cases we
refuse to probe in.
"""
@pytest.mark.parametrize("var", ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY"))
def test_skips_over_ssh(self, cli_mod, monkeypatch, var):
monkeypatch.setattr(cli_mod.sys.stdin, "isatty", lambda: True, raising=False)
monkeypatch.setattr(cli_mod.sys.stdout, "isatty", lambda: True, raising=False)
for v in ("SSH_CONNECTION", "SSH_CLIENT", "SSH_TTY"):
monkeypatch.delenv(v, raising=False)
monkeypatch.setenv(var, "1.2.3.4 5555 22")
assert cli_mod._query_osc11_background() is None
def test_skips_when_not_a_tty(self, cli_mod, monkeypatch):
monkeypatch.setattr(cli_mod.sys.stdin, "isatty", lambda: False, raising=False)
assert cli_mod._query_osc11_background() is None
class TestLightModeRemap:
def test_remap_no_op_in_dark_mode(self, cli_mod, monkeypatch):
monkeypatch.setenv("HERMES_LIGHT", "0")
@ -133,7 +154,9 @@ class TestSkinConfigHook:
after = SkinConfig.get_color
assert before is after
def test_skin_color_remaps_through_wrapper_in_light_mode(self, cli_mod, monkeypatch):
def test_skin_color_remaps_through_wrapper_in_light_mode(
self, cli_mod, monkeypatch
):
from hermes_cli.skin_engine import SkinConfig
cli_mod._LIGHT_MODE_CACHE = True

View file

@ -0,0 +1,116 @@
"""Regression guard for issue #34569 — inline /steer (and /model) submit
must repaint the input area after clearing the buffer.
Mechanism of the bug
--------------------
``handle_enter`` dispatches ``/steer`` (and ``/model``) inline on the UI
thread while the agent is running. Those branches called
``buffer.reset(append_to_history=True)`` but unlike every *other*
early-return branch in the handler did NOT call ``event.app.invalidate()``.
Because ``process_command()`` prints through ``patch_stdout`` (which scrolls
output above the prompt and never triggers a prompt_toolkit redraw), the
just-cleared input area could keep showing the submitted ``/steer <text>``
until some unrelated redraw fired. The user saw their submitted text as if
it were unsent and could accidentally re-submit it.
This test pins the contract structurally: inside ``handle_enter``, any
inline-command early-return that resets the buffer must be followed by an
``event.app.invalidate()`` before its ``return``. It is an *invariant*
(every reset-then-return repaints), not a snapshot of current source.
"""
from __future__ import annotations
import ast
from pathlib import Path
def _load_handle_enter_node() -> ast.FunctionDef:
"""Extract the ``handle_enter`` nested function node from cli.py."""
cli_path = Path(__file__).resolve().parents[2] / "cli.py"
tree = ast.parse(cli_path.read_text(encoding="utf-8"))
target = None
for node in ast.walk(tree):
if isinstance(node, ast.FunctionDef) and node.name == "handle_enter":
target = node
break
assert target is not None, "handle_enter closure not found in cli.py"
return target
def _is_buffer_reset(node: ast.stmt) -> bool:
"""True if the statement is ``...current_buffer.reset(...)``."""
if not isinstance(node, ast.Expr):
return False
call = node.value
if not isinstance(call, ast.Call):
return False
func = call.func
return isinstance(func, ast.Attribute) and func.attr == "reset"
def _is_invalidate(node: ast.stmt) -> bool:
"""True if the statement is ``event.app.invalidate()``."""
if not isinstance(node, ast.Expr):
return False
call = node.value
if not isinstance(call, ast.Call):
return False
func = call.func
return isinstance(func, ast.Attribute) and func.attr == "invalidate"
def _collect_reset_blocks(func: ast.FunctionDef) -> list[list[ast.stmt]]:
"""Find every statement sequence (a block body/orelse/finalbody) within
``handle_enter`` that contains a ``buffer.reset()`` call."""
blocks: list[list[ast.stmt]] = []
for node in ast.walk(func):
for attr in ("body", "orelse", "finalbody"):
seq = getattr(node, attr, None)
if not isinstance(seq, list):
continue
if any(isinstance(s, ast.stmt) and _is_buffer_reset(s) for s in seq):
blocks.append(seq)
return blocks
def test_inline_command_reset_branches_invalidate():
"""Every handle_enter branch that resets the buffer and then returns must
invalidate the app first (issue #34569)."""
func = _load_handle_enter_node()
reset_blocks = _collect_reset_blocks(func)
assert reset_blocks, "expected to find buffer.reset() calls in handle_enter"
offenders = []
for seq in reset_blocks:
for i, stmt in enumerate(seq):
if not _is_buffer_reset(stmt):
continue
# Find the next return after this reset in the same block.
ret_idx = None
for j in range(i + 1, len(seq)):
if isinstance(seq[j], ast.Return):
ret_idx = j
break
if ret_idx is None:
# reset not directly followed by a return in this block
# (e.g. the fall-through reset at the end of the handler) —
# the next user input naturally repaints, so skip.
continue
between = seq[i + 1 : ret_idx]
if not any(_is_invalidate(s) for s in between):
offenders.append(ast.dump(stmt))
assert not offenders, (
"handle_enter has reset-then-return branch(es) that never call "
"event.app.invalidate() — the input area can keep showing the "
"submitted text (issue #34569). Offending reset stmts:\n"
+ "\n".join(offenders)
)
if __name__ == "__main__": # pragma: no cover
test_inline_command_reset_branches_invalidate()
print("ok")

View file

@ -276,6 +276,111 @@ class TestExtractCacheBustingConfig:
assert out["tools.registry_generation"] == 12345
def test_skips_honcho_config_read_when_provider_is_not_honcho(self, monkeypatch):
"""Non-Honcho gateways must not read/parse honcho.json on every message."""
from gateway.run import GatewayRunner
called = False
def _boom():
nonlocal called
called = True
raise AssertionError("should not read Honcho config")
monkeypatch.setattr(GatewayRunner, "_extract_honcho_cache_busting_config", _boom)
out = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "mem0"}})
assert called is False
assert out["honcho.peer_name"] is None
assert out["honcho.user_peer_aliases"] is None
def test_reads_honcho_config_only_when_provider_is_honcho(self, monkeypatch):
from gateway.run import GatewayRunner
calls = []
def _fake():
calls.append(True)
return {
"honcho.peer_name": "eri",
"honcho.ai_peer": "hermes",
"honcho.pin_peer_name": True,
"honcho.runtime_peer_prefix": "tg_",
"honcho.user_peer_aliases": [("123", "eri")],
}
monkeypatch.setattr(GatewayRunner, "_extract_honcho_cache_busting_config", _fake)
out = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
assert calls == [True]
assert out["honcho.peer_name"] == "eri"
assert out["honcho.user_peer_aliases"] == [("123", "eri")]
def test_memory_provider_change_busts_signature(self, monkeypatch):
"""Switching memory.provider must itself change the cache-busting
signature, so the agent is rebuilt when a user swaps providers
mid-gateway (independent of the honcho.json identity keys)."""
from gateway.run import GatewayRunner
# Neutralize honcho.json reads so the only varying input is the
# provider value itself.
monkeypatch.setattr(
GatewayRunner,
"_extract_honcho_cache_busting_config",
classmethod(lambda cls: cls._empty_honcho_cache_busting_config()),
)
sig_honcho = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "honcho"}})
sig_mem0 = GatewayRunner._extract_cache_busting_config({"memory": {"provider": "mem0"}})
assert sig_honcho["memory.provider"] == "honcho"
assert sig_mem0["memory.provider"] == "mem0"
assert sig_honcho != sig_mem0
def test_honcho_cache_busting_config_memoized_by_mtime(self, monkeypatch, tmp_path):
"""Repeated Honcho extraction for unchanged honcho.json should reuse parse result."""
from types import SimpleNamespace
from gateway.run import GatewayRunner
config_path = tmp_path / "honcho.json"
config_path.write_text("{}")
parse_calls = []
class FakeConfig:
peer_name = "eri"
ai_peer = "hermes"
pin_peer_name = False
runtime_peer_prefix = "tg_"
user_peer_aliases = {"123": "eri"}
@classmethod
def from_global_config(cls, config_path=None):
parse_calls.append(config_path)
return cls()
fake_client = SimpleNamespace(
HonchoClientConfig=FakeConfig,
resolve_config_path=lambda: config_path,
)
monkeypatch.setitem(__import__("sys").modules, "plugins.memory.honcho.client", fake_client)
monkeypatch.setattr(GatewayRunner, "_HONCHO_CACHE_BUSTING_MEMO", {})
first = GatewayRunner._extract_honcho_cache_busting_config()
second = GatewayRunner._extract_honcho_cache_busting_config()
assert first == second
assert first["honcho.user_peer_aliases"] == [("123", "eri")]
assert parse_calls == [config_path]
config_path.write_text("{\n \"changed\": true\n}")
third = GatewayRunner._extract_honcho_cache_busting_config()
assert third == first
assert parse_calls == [config_path, config_path]
def test_full_round_trip_busts_cache_on_real_edit(self):
"""End-to-end: simulate a config edit on main and verify the
extracted cache_keys change produces a new signature."""

View file

@ -343,6 +343,56 @@ class TestLoadGatewayConfig:
# Env value preserved, not clobbered by yaml.
assert os.environ.get("DISCORD_THREAD_REQUIRE_MENTION") == "true"
def test_bridges_discord_allow_from_from_config_yaml(self, tmp_path, monkeypatch):
"""discord.allow_from should populate DISCORD_ALLOWED_USERS for auth."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"discord:\n"
" allow_from:\n"
" - \"123456789012345678\"\n"
" - \"999888777666555444\"\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.delenv("DISCORD_ALLOWED_USERS", raising=False)
config = load_gateway_config()
assert config.platforms[Platform.DISCORD].extra["allow_from"] == [
"123456789012345678",
"999888777666555444",
]
assert os.environ.get("DISCORD_ALLOWED_USERS") == (
"123456789012345678,999888777666555444"
)
def test_bridges_discord_platform_extra_allow_from_to_env(self, tmp_path, monkeypatch):
"""platforms.discord.extra.allow_from should reach DISCORD_ALLOWED_USERS too."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"platforms:\n"
" discord:\n"
" extra:\n"
" allow_from:\n"
" - \"123456789012345678\"\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.delenv("DISCORD_ALLOWED_USERS", raising=False)
config = load_gateway_config()
assert config.platforms[Platform.DISCORD].extra["allow_from"] == [
"123456789012345678",
]
assert os.environ.get("DISCORD_ALLOWED_USERS") == "123456789012345678"
def test_bridges_quoted_false_platform_enabled_from_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
@ -361,6 +411,69 @@ class TestLoadGatewayConfig:
assert config.platforms[Platform.API_SERVER].enabled is False
assert Platform.API_SERVER not in config.get_connected_platforms()
def test_bridges_nested_gateway_platforms_from_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"gateway:\n"
" platforms:\n"
" telegram:\n"
" enabled: true\n"
" token: nested-token\n"
" home_channel:\n"
" platform: telegram\n"
" chat_id: \"123\"\n"
" name: Nested Home\n"
" extra:\n"
" reply_prefix: nested\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
config = load_gateway_config()
telegram = config.platforms[Platform.TELEGRAM]
assert telegram.enabled is True
assert telegram.token == "nested-token"
assert telegram.home_channel == HomeChannel(
platform=Platform.TELEGRAM,
chat_id="123",
name="Nested Home",
)
assert telegram.extra["reply_prefix"] == "nested"
def test_top_level_platforms_override_nested_gateway_platforms(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
config_path = hermes_home / "config.yaml"
config_path.write_text(
"gateway:\n"
" platforms:\n"
" telegram:\n"
" enabled: false\n"
" token: nested-token\n"
" extra:\n"
" reply_prefix: nested\n"
"platforms:\n"
" telegram:\n"
" enabled: true\n"
" token: top-token\n"
" extra:\n"
" reply_prefix: top\n",
encoding="utf-8",
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
config = load_gateway_config()
telegram = config.platforms[Platform.TELEGRAM]
assert telegram.enabled is True
assert telegram.token == "top-token"
assert telegram.extra["reply_prefix"] == "top"
def test_bridges_quoted_false_session_notify_from_config_yaml(self, tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()

View file

@ -0,0 +1,202 @@
"""Tests for the outbound silence-narration filter (anti-loop control).
See the gateway delivery path: hallucinated "silence" tokens like ``*(silent)*``
are dropped pre-send so bot-to-bot channels can't mirror them into a token-burning
loop that crashes a model with "no content after all retries".
"""
import pytest
from gateway.config import GatewayConfig, Platform
from gateway.delivery import (
DeliveryRouter,
DeliveryTarget,
_is_silence_narration,
)
# --- Truth table -----------------------------------------------------------
POSITIVE_CASES = [
"*(silent)*",
"*Silence.*",
"🔇",
".",
"",
"...",
"(silent)",
"_silent_",
"silent",
" *(silent)* ",
"`silent`",
"~silent~",
"Silence",
"no response",
"No Reply.",
]
NEGATIVE_CASES = [
"Silence is golden — here is the plan...",
"Silent install completed",
"The deployment ran silently in the background",
"ok",
"👍",
"Here is the result:\n\n- item one\n- item two",
"I have nothing to add, but here is why: the build is green.",
"silently", # word boundary — trailing letters mean it isn't a bare token
"no responses were collected from the survey",
# A 64+ char string that opens with a silence token must not be dropped.
"silent " + "x" * 70,
"",
" ",
]
@pytest.mark.parametrize("content", POSITIVE_CASES)
def test_is_silence_narration_positive(content):
assert _is_silence_narration(content) is True
@pytest.mark.parametrize("content", NEGATIVE_CASES)
def test_is_silence_narration_negative(content):
assert _is_silence_narration(content) is False
def test_is_silence_narration_none_safe():
assert _is_silence_narration(None) is False
def test_length_guard_rejects_long_strings():
# Exactly 65 chars of dots — over the 64-char guard, so not treated as narration.
assert _is_silence_narration("." * 65) is False
assert _is_silence_narration("." * 64) is True
# --- Integration through DeliveryRouter ------------------------------------
class RecordingAdapter:
def __init__(self):
self.calls = []
async def send(self, chat_id, content, metadata=None):
self.calls.append({"chat_id": chat_id, "content": content, "metadata": metadata})
return {"success": True}
@pytest.mark.asyncio
async def test_silence_narration_dropped_pre_send(tmp_path, monkeypatch):
monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
adapter = RecordingAdapter()
router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
target = DeliveryTarget.parse("discord:99887766")
result = await router._deliver_to_platform(target, "*(silent)*", metadata=None)
assert adapter.calls == [] # adapter.send never invoked
assert result == {
"success": True,
"filtered": "silence_narration",
"delivered": False,
}
@pytest.mark.asyncio
async def test_real_message_is_delivered(tmp_path, monkeypatch):
monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
adapter = RecordingAdapter()
router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
target = DeliveryTarget.parse("discord:99887766")
result = await router._deliver_to_platform(
target, "Silence is golden — here is the plan...", metadata=None
)
assert len(adapter.calls) == 1
assert adapter.calls[0]["content"] == "Silence is golden — here is the plan..."
assert result == {"success": True}
@pytest.mark.asyncio
async def test_config_opt_out_lets_silence_through(tmp_path, monkeypatch):
monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
adapter = RecordingAdapter()
config = GatewayConfig(filter_silence_narration=False)
router = DeliveryRouter(config, adapters={Platform.DISCORD: adapter})
target = DeliveryTarget.parse("discord:99887766")
result = await router._deliver_to_platform(target, "*(silent)*", metadata=None)
assert len(adapter.calls) == 1
assert adapter.calls[0]["content"] == "*(silent)*"
assert result == {"success": True}
@pytest.mark.asyncio
async def test_env_override_disables_filter(tmp_path, monkeypatch):
monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
monkeypatch.setenv("HERMES_FILTER_SILENCE_NARRATION", "0")
adapter = RecordingAdapter()
# Config default is True, but env override wins.
router = DeliveryRouter(GatewayConfig(), adapters={Platform.DISCORD: adapter})
target = DeliveryTarget.parse("discord:99887766")
result = await router._deliver_to_platform(target, "🔇", metadata=None)
assert len(adapter.calls) == 1
assert result == {"success": True}
@pytest.mark.asyncio
async def test_env_override_enables_filter_over_config(tmp_path, monkeypatch):
monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
monkeypatch.setenv("HERMES_FILTER_SILENCE_NARRATION", "1")
adapter = RecordingAdapter()
# Config says off, env override forces on.
config = GatewayConfig(filter_silence_narration=False)
router = DeliveryRouter(config, adapters={Platform.DISCORD: adapter})
target = DeliveryTarget.parse("discord:99887766")
result = await router._deliver_to_platform(target, "*(silent)*", metadata=None)
assert adapter.calls == []
assert result["filtered"] == "silence_narration"
@pytest.mark.asyncio
async def test_local_delivery_not_filtered(tmp_path, monkeypatch):
monkeypatch.setattr("gateway.delivery.get_hermes_home", lambda: tmp_path)
monkeypatch.delenv("HERMES_FILTER_SILENCE_NARRATION", raising=False)
router = DeliveryRouter(GatewayConfig(), adapters={})
results = await router.deliver(
content="*(silent)*",
targets=[DeliveryTarget.parse("local")],
job_id="silence-job",
)
# Local path saved the file (no loop risk) and was not filtered.
local_result = results["local"]
assert local_result["success"] is True
saved_path = local_result["result"]["path"]
assert saved_path.endswith(".md")
# --- Config round-trip ------------------------------------------------------
def test_config_flag_defaults_true():
assert GatewayConfig().filter_silence_narration is True
def test_config_from_dict_parses_flag():
cfg = GatewayConfig.from_dict({"filter_silence_narration": False})
assert cfg.filter_silence_narration is False
def test_config_to_dict_roundtrip():
cfg = GatewayConfig(filter_silence_narration=False)
assert cfg.to_dict()["filter_silence_narration"] is False
restored = GatewayConfig.from_dict(cfg.to_dict())
assert restored.filter_silence_narration is False

View file

@ -0,0 +1,147 @@
"""Regression tests for #35314 — empty model on the post-interrupt recovery turn.
After a ``stream_interrupt_abort`` during an active gateway session, the recovery
turn was sometimes built with ``model=""`` (a transient config-cache miss returned
an empty ``user_config``). Every API call then failed HTTP 400 "No models
provided", "trying fallback..." was logged but never executed (the user had no
fallback configured), and the session went silent until the user re-sent.
These tests pin two fixes:
1. ``_resolve_session_agent_runtime`` caches the last successfully-resolved
model per session and recovers it when a fresh resolution comes back empty.
2. ``_has_pending_fallback`` gates the "trying fallback..." status so it is only
announced when a fallback chain actually exists.
"""
import threading
import gateway.run as gateway_run
def _make_runner():
runner = object.__new__(gateway_run.GatewayRunner)
runner._session_model_overrides = {}
runner._last_resolved_model = {}
runner._service_tier = None
runner._agent_cache = {}
runner._agent_cache_lock = threading.Lock()
return runner
def _patch_resolution(monkeypatch, *, model_from_config: str, provider: str = "openrouter"):
"""Stub gateway model + runtime resolution to a known state."""
monkeypatch.setattr(gateway_run, "_resolve_gateway_model", lambda cfg=None: model_from_config)
monkeypatch.setattr(
gateway_run,
"_resolve_runtime_agent_kwargs",
lambda: {
"provider": provider,
"api_key": "x",
"base_url": "https://openrouter.ai/api/v1",
"api_mode": "chat_completions",
},
)
def test_normal_turn_caches_last_resolved_model(monkeypatch):
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
runner = _make_runner()
sk = "agent:main:discord:dm:123"
model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
assert model == "deepseek/deepseek-v4-flash"
# Cached per-session AND process-wide for first-seen-session recovery.
assert runner._last_resolved_model[sk] == "deepseek/deepseek-v4-flash"
assert runner._last_resolved_model["*"] == "deepseek/deepseek-v4-flash"
def test_empty_model_recovers_session_last_good(monkeypatch):
runner = _make_runner()
sk = "agent:main:discord:dm:123"
# Turn 1: config has the model — cache it.
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
runner._resolve_session_agent_runtime(session_key=sk, user_config={"model": {"default": "x"}})
# Turn 2: simulate the transient empty config read (the #35314 race).
_patch_resolution(monkeypatch, model_from_config="", provider="")
model, _ = runner._resolve_session_agent_runtime(session_key=sk, user_config={})
assert model == "deepseek/deepseek-v4-flash", "recovery turn must reuse last-known-good, not build model=''"
def test_empty_model_new_session_recovers_global_last_good(monkeypatch):
runner = _make_runner()
# Prime a different session so the process-wide "*" slot is populated.
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:111", user_config={"model": {}})
# A brand-new session that hits an empty config read still recovers via "*".
_patch_resolution(monkeypatch, model_from_config="", provider="")
model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:999", user_config={})
assert model == "deepseek/deepseek-v4-flash"
def test_cold_start_empty_model_does_not_crash(monkeypatch):
"""No last-good anywhere + empty config → returns '' gracefully (no exception)."""
_patch_resolution(monkeypatch, model_from_config="", provider="")
runner = _make_runner()
model, _ = runner._resolve_session_agent_runtime(session_key="agent:main:discord:dm:1", user_config={})
assert model == ""
def test_bare_runner_without_cache_attr_does_not_crash(monkeypatch):
"""object.__new__ runners (test helpers / pitfall #17) lack _last_resolved_model.
The getattr guard must tolerate the missing attribute.
"""
_patch_resolution(monkeypatch, model_from_config="deepseek/deepseek-v4-flash")
runner = object.__new__(gateway_run.GatewayRunner)
runner._session_model_overrides = {}
runner._service_tier = None
# Deliberately omit _last_resolved_model.
model, _ = runner._resolve_session_agent_runtime(session_key="x", user_config={"model": {}})
assert model == "deepseek/deepseek-v4-flash"
# ── _has_pending_fallback gate ──────────────────────────────────────────────
def _bare_agent():
import run_agent
return object.__new__(run_agent.AIAgent)
def test_has_pending_fallback_empty_chain():
agent = _bare_agent()
agent._fallback_chain = []
agent._fallback_index = 0
assert agent._has_pending_fallback() is False
def test_has_pending_fallback_with_chain():
agent = _bare_agent()
agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
agent._fallback_index = 0
assert agent._has_pending_fallback() is True
def test_has_pending_fallback_exhausted_chain():
agent = _bare_agent()
agent._fallback_chain = [{"provider": "openai", "model": "gpt-5"}]
agent._fallback_index = 1
assert agent._has_pending_fallback() is False
def test_has_pending_fallback_missing_attrs():
"""Bare agent with no fallback attributes set must default to False, not crash."""
agent = _bare_agent()
assert agent._has_pending_fallback() is False

View file

@ -336,9 +336,35 @@ class TestEdgeCases:
paths, _ = _extract("File at /tmp/my file.png here")
assert paths == []
def test_windows_path_not_matched(self):
"""Windows-style paths should not match."""
paths, _ = _extract("See C:\\Users\\test\\image.png")
@pytest.mark.parametrize(
"content,expected",
[
# Backslash separators (native Windows style)
("See C:\\Users\\test\\image.png here", "C:\\Users\\test\\image.png"),
# Forward slashes with drive letter (common in cross-platform code)
("See C:/Users/test/image.png here", "C:/Users/test/image.png"),
# Non-C: drive
("Video at D:/data/clip.mp4 ready", "D:/data/clip.mp4"),
# Lowercase drive letter
("Path e:/audio/track.mp3 done", "e:/audio/track.mp3"),
],
)
def test_windows_drive_letter_paths_matched(self, content, expected):
"""Windows drive-letter paths (C:/..., C:\\...) must be detected (#34632).
Prior behavior anchored on (?:~/|/) only, which silently dropped
Windows absolute paths so the agent's bare-path references were
sent as text instead of native uploads.
"""
paths, cleaned = _extract(content)
assert paths == [expected]
assert expected not in cleaned
def test_relative_windows_path_not_matched(self):
"""A bare Windows-style filename without a drive letter must still
not match (e.g. ``foo\\bar.png`` is treated as relative, like its
Unix sibling ``foo/bar.png``)."""
paths, _ = _extract("File at foo\\bar.png here")
assert paths == []
def test_relative_path_not_matched(self):

View file

@ -361,6 +361,45 @@ class TestExtractMedia:
assert "[[audio_as_voice]]" not in cleaned
assert "[[as_document]]" not in cleaned
# Windows path support — regression coverage for #34632
def test_media_tag_windows_backslash_path(self):
"""extract_media should recognise Windows backslash paths."""
media, cleaned = BasePlatformAdapter.extract_media(
r"MEDIA:C:\Users\kotsu\file.pdf"
)
assert len(media) == 1
assert media[0][0].endswith("file.pdf")
def test_media_tag_windows_forward_slash_path(self):
"""extract_media should recognise Windows forward-slash paths."""
media, cleaned = BasePlatformAdapter.extract_media(
"MEDIA:C:/Users/kotsu/file.pdf"
)
assert len(media) == 1
assert media[0][0].endswith("file.pdf")
def test_media_tag_windows_drive_root(self):
"""extract_media should recognise a path at the drive root."""
media, cleaned = BasePlatformAdapter.extract_media(
r"MEDIA:D:\report.md"
)
assert len(media) == 1
assert media[0][0].endswith("report.md")
def test_media_tag_unix_paths_still_work(self):
"""Unix absolute and tilde paths must still extract after Windows change."""
for content in ["MEDIA:/tmp/audio.ogg", r"MEDIA:~/docs/notes.md"]:
media, _ = BasePlatformAdapter.extract_media(content)
assert len(media) == 1, f"Failed for: {content}"
def test_relative_path_still_ignored(self):
"""Relative Windows-style paths (no drive letter) must not match."""
media, _ = BasePlatformAdapter.extract_media(
r"MEDIA:Users\kotsu\file.pdf"
)
assert media == []
class TestMediaExtensionAllowlistParity:
"""Regression coverage for issue #34517 — the MEDIA: extension black hole.

View file

@ -294,19 +294,20 @@ class TestPlatformReconnectWatcher:
assert runner._failed_platforms[Platform.TELEGRAM]["attempts"] == 2
@pytest.mark.asyncio
async def test_reconnect_pauses_after_circuit_breaker_threshold(self):
"""After enough consecutive retryable failures, the watcher should
*pause* the platform (keep it in the queue but stop hammering it),
not drop it. The user resumes via /platform resume.
async def test_reconnect_never_auto_pauses_retryable_failures(self):
"""Retryable failures (network/DNS) must keep retrying indefinitely —
the watcher must NOT auto-pause them. Auto-pausing a transiently-failed
platform left bots silently dead after a DNS blip (#35284). The pause
circuit breaker remains available for manual /platform pause only.
"""
runner = _make_runner()
platform_config = PlatformConfig(enabled=True, token="test")
# 9 prior attempts — the next failure will be the 10th and should
# trip the circuit breaker.
# Far past the old circuit-breaker threshold (10): even after many
# consecutive retryable failures the platform must stay unpaused.
runner._failed_platforms[Platform.TELEGRAM] = {
"config": platform_config,
"attempts": 9,
"attempts": 25,
"next_retry": time.monotonic() - 1,
}
@ -332,12 +333,15 @@ class TestPlatformReconnectWatcher:
await run_one_iteration()
# Platform stays in queue — paused, not dropped
# Platform stays in queue and keeps retrying — never auto-paused.
assert Platform.TELEGRAM in runner._failed_platforms
info = runner._failed_platforms[Platform.TELEGRAM]
assert info["paused"] is True
assert info["attempts"] == 10
assert "pause_reason" in info
assert info.get("paused") is not True
assert "pause_reason" not in info
assert info["attempts"] == 26
# next_retry is pushed out by the backoff (capped at 300s), not inf.
assert info["next_retry"] != float("inf")
assert info["next_retry"] > time.monotonic()
@pytest.mark.asyncio
async def test_reconnect_skips_paused_platforms(self):

View file

@ -0,0 +1,147 @@
r"""Tests for _TOOL_MEDIA_RE regex patterns in gateway/run.py.
Issue #34632: The _TOOL_MEDIA_RE patterns in GatewayRunner used (?:/|~\/) to
anchor paths, which only matched Unix-style absolute and home-relative paths.
Windows absolute paths (C:\\Users\\..., D:/...) were silently ignored, causing
MEDIA directive delivery to fail on Windows.
Fix: Add [A-Za-z]:[/\\\\] as a third anchor alternative in both patterns.
Two identical _TOOL_MEDIA_RE patterns exist in run.py:
1. History scanning (~L17223): collects already-seen media paths
2. Result scanning (~L17549): extracts new media tags from agent output
This test file validates that both equivalent regex patterns correctly match
Windows paths while preserving existing Unix path matching behavior.
"""
import re
import pytest
# Reconstruct the exact _TOOL_MEDIA_RE pattern from gateway/run.py
# The pattern is built by concatenating raw string parts:
# r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|...))'
_TOOL_MEDIA_RE = re.compile(
r'MEDIA:((?:[A-Za-z]:[/\\]|/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
r'txt|csv|apk|ipa))',
re.IGNORECASE,
)
# Reconstruct the pre-fix pattern (without Windows anchor) for regression proof
_TOOL_MEDIA_RE_PRE_FIX = re.compile(
r'MEDIA:((?:/|~\/)\S+\.(?:png|jpe?g|gif|webp|'
r'mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|'
r'flac|epub|pdf|zip|rar|7z|docx?|xlsx?|pptx?|'
r'txt|csv|apk|ipa))',
re.IGNORECASE,
)
class TestToolMediaReWindowsPaths:
"""Issue #34632: _TOOL_MEDIA_RE must match Windows absolute paths."""
# ── Positive: Windows paths now match ──────────────────────────
@pytest.mark.parametrize("media_tag, expected_path", [
# Windows backslash paths
("MEDIA:C:\\Users\\test\\image.png", "C:\\Users\\test\\image.png"),
("MEDIA:D:\\data\\report.pdf", "D:\\data\\report.pdf"),
("MEDIA:E:\\Photos\\vacation.jpg", "E:\\Photos\\vacation.jpg"),
# Windows forward-slash paths
("MEDIA:C:/Users/test/image.png", "C:/Users/test/image.png"),
("MEDIA:D:/data/report.pdf", "D:/data/report.pdf"),
# Mixed separators
("MEDIA:C:\\Users/test\\image.webp", "C:\\Users/test\\image.webp"),
# Various extensions
("MEDIA:F:\\videos\\clip.mp4", "F:\\videos\\clip.mp4"),
("MEDIA:G:\\audio\\song.mp3", "G:\\audio\\song.mp3"),
("MEDIA:H:\\docs\\sheet.xlsx", "H:\\docs\\sheet.xlsx"),
("MEDIA:Z:\\archive\\backup.zip", "Z:\\archive\\backup.zip"),
])
def test_windows_paths_match(self, media_tag, expected_path):
"""Windows absolute paths with drive letters are matched."""
match = _TOOL_MEDIA_RE.search(media_tag)
assert match is not None, f"Should match: {media_tag}"
assert match.group(1) == expected_path
# ── Positive: Unix paths still match ───────────────────────────
@pytest.mark.parametrize("media_tag, expected_path", [
("MEDIA:/tmp/output.png", "/tmp/output.png"),
("MEDIA:/var/log/report.pdf", "/var/log/report.pdf"),
("MEDIA:/home/user/docs/file.txt", "/home/user/docs/file.txt"),
# Home-relative
("MEDIA:~/Downloads/image.jpg", "~/Downloads/image.jpg"),
("MEDIA:~/Documents/report.pdf", "~/Documents/report.pdf"),
])
def test_unix_paths_still_match(self, media_tag, expected_path):
"""Unix-style absolute and home-relative paths still match."""
match = _TOOL_MEDIA_RE.search(media_tag)
assert match is not None, f"Should match: {media_tag}"
assert match.group(1) == expected_path
# ── Negative: invalid paths don't match ────────────────────────
@pytest.mark.parametrize("text", [
"No MEDIA tag here",
"MEDIA:relative/path/file.png", # relative path, no anchor
"MEDIA:file.png", # no directory
"MEDIA:C:file.png", # drive letter but no separator
"MEDIA:/path/to/file.unknown", # unsupported extension
"MEDIA:/path/to/file", # no extension
"MEDIA:", # empty path
])
def test_invalid_paths_dont_match(self, text):
"""Non-MEDIA text, relative paths, and unsupported extensions are ignored."""
match = _TOOL_MEDIA_RE.search(text)
assert match is None, f"Should NOT match: {text}"
# ── Negative/preserved: old pattern rejects Windows paths ──────
@pytest.mark.parametrize("media_tag", [
"MEDIA:C:\\Users\\test\\image.png",
"MEDIA:D:/data/report.pdf",
"MEDIA:C:\\path\\file.jpg",
])
def test_pre_fix_pattern_rejects_windows(self, media_tag):
"""The pre-fix pattern (without Windows anchor) does NOT match Windows paths.
This proves the fix is necessary without it, these paths are silently ignored."""
match = _TOOL_MEDIA_RE_PRE_FIX.search(media_tag)
assert match is None, f"Pre-fix pattern should NOT match: {media_tag}"
# ── Edge cases ─────────────────────────────────────────────────
def test_multiple_media_tags_in_content(self):
"""Multiple MEDIA tags in the same content are all found."""
content = (
"Some text MEDIA:C:\\path\\img.png and more MEDIA:/tmp/out.pdf trailing"
)
matches = list(_TOOL_MEDIA_RE.finditer(content))
assert len(matches) == 2
paths = [m.group(1) for m in matches]
assert "C:\\path\\img.png" in paths
assert "/tmp/out.pdf" in paths
def test_case_insensitive_drive_letter(self):
"""Drive letters are case-insensitive due to re.IGNORECASE."""
match_lower = _TOOL_MEDIA_RE.search("MEDIA:c:\\path\\file.png")
match_upper = _TOOL_MEDIA_RE.search("MEDIA:C:\\path\\file.png")
assert match_lower is not None
assert match_upper is not None
assert match_lower.group(1).lower() == match_upper.group(1).lower()
@pytest.mark.parametrize("media_tag", [
"MEDIA:C:\\path\\file.jpeg",
"MEDIA:C:\\path\\file.JPG",
"MEDIA:C:\\path\\file.GIF",
"MEDIA:C:\\path\\file.MP4",
])
def test_case_insensitive_extensions(self, media_tag):
"""File extensions are matched case-insensitively."""
match = _TOOL_MEDIA_RE.search(media_tag)
assert match is not None, f"Should match: {media_tag}"

View file

@ -97,7 +97,7 @@ async def test_status_command_reports_running_agent_without_interrupt(monkeypatc
result = await runner._handle_message(_make_event("/status"))
assert "**Session ID:** `sess-1`" in result
assert "**Tokens:** 321" in result
assert "**Cumulative API tokens (re-sent each call):** 321" in result
assert "**Agent Running:** Yes ⚡" in result
assert "**Title:**" not in result
running_agent.interrupt.assert_not_called()
@ -150,7 +150,7 @@ async def test_status_command_reads_token_totals_from_session_db():
result = await runner._handle_message(_make_event("/status"))
# 1000 + 250 + 500 + 100 + 50 = 1,900
assert "**Tokens:** 1,900" in result
assert "**Cumulative API tokens (re-sent each call):** 1,900" in result
@pytest.mark.asyncio
@ -171,7 +171,7 @@ async def test_status_command_tokens_zero_when_session_db_row_missing():
result = await runner._handle_message(_make_event("/status"))
assert "**Tokens:** 0" in result
assert "**Cumulative API tokens (re-sent each call):** 0" in result
@pytest.mark.asyncio

View file

@ -146,6 +146,78 @@ class TestTelegramModelPicker:
# State is cleaned up after a successful switch.
assert "12345" not in adapter._model_picker_state
@pytest.mark.asyncio
async def test_provider_group_folds_and_drills_down(self, monkeypatch):
"""A provider family (e.g. MiniMax) collapses to one mpg: button at
the top level; tapping it expands to its authenticated members as
mp: buttons. A group reduced to a single authenticated member shows
no submenu (direct mp: button).
Inspects callback_data by recording every InlineKeyboardButton built,
which is robust to whether `telegram` is the real SDK or the module
mock (the SDK markup objects don't expose a plain iterable under the
mock)."""
import gateway.platforms.telegram as tg
built: list = []
class _RecordingButton:
def __init__(self, text, callback_data=None, **kw):
self.text = text
self.callback_data = callback_data
built.append(callback_data)
class _RecordingMarkup:
def __init__(self, rows):
self.inline_keyboard = rows
monkeypatch.setattr(tg, "InlineKeyboardButton", _RecordingButton)
monkeypatch.setattr(tg, "InlineKeyboardMarkup", _RecordingMarkup)
adapter = _make_adapter()
async def mock_send_message(**kwargs):
return SimpleNamespace(message_id=101)
adapter._bot.send_message = AsyncMock(side_effect=mock_send_message)
providers = [
{"slug": "minimax", "name": "MiniMax", "total_models": 2},
{"slug": "minimax-cn", "name": "MiniMax (China)", "total_models": 3},
{"slug": "xai", "name": "xAI", "total_models": 1}, # lone group member
]
await adapter.send_model_picker(
chat_id="12345",
providers=providers,
current_model="m",
current_provider="minimax",
session_key="s",
on_model_selected=AsyncMock(),
metadata=None,
)
# Top-level keyboard: MiniMax family folded into one group button;
# xai (lone member) degraded to a direct provider button.
assert "mpg:minimax" in built
assert "mp:xai" in built
assert "mp:minimax" not in built
assert "mp:minimax-cn" not in built
# Drill into the MiniMax group → members appear as mp: buttons + back.
built.clear()
query = AsyncMock()
query.message = MagicMock()
query.message.chat_id = 12345
query.answer = AsyncMock()
query.edit_message_text = AsyncMock()
await adapter._handle_model_picker_callback(query, "mpg:minimax", "12345")
assert "mp:minimax" in built
assert "mp:minimax-cn" in built
assert "mb" in built # back-to-providers button present
@pytest.mark.asyncio
async def test_retries_without_thread_when_thread_not_found(self):
adapter = _make_adapter()

View file

@ -11,6 +11,7 @@ import pytest
from gateway.config import PlatformConfig
from gateway.config import GatewayConfig, HomeChannel, Platform, _apply_env_overrides
from gateway.platforms.base import SendResult
from gateway.platforms.base import MessageEvent, MessageType
from gateway.platforms import weixin
from gateway.platforms.weixin import ContextTokenStore, WeixinAdapter
from tools.send_message_tool import _parse_target_ref, _send_to_platform
@ -853,15 +854,27 @@ class TestWeixinContentDedup:
adapter = _make_adapter()
adapter._poll_session = object()
adapter.handle_message = AsyncMock()
# Tighten the text-debounce delay so the flush completes quickly.
adapter._text_batch_delay_seconds = 0.05
adapter._text_batch_split_delay_seconds = 0.05
base_msg = {
"from_user_id": "wxid_user1",
"item_list": [{"type": 1, "text_item": {"text": "hello world"}}],
}
asyncio.run(adapter._process_message({**base_msg, "message_id": "msg-1"}))
asyncio.run(adapter._process_message({**base_msg, "message_id": "msg-2"}))
async def _drive():
# Both inbound messages share the same event loop so the debounce
# task created by the first one survives to be flushed.
await adapter._process_message({**base_msg, "message_id": "msg-1"})
await adapter._process_message({**base_msg, "message_id": "msg-2"})
# Wait out the quiet period so the buffered text batch flushes.
await asyncio.sleep(0.2)
asyncio.run(_drive())
# Content-dedup drops the second (duplicate) message before it is even
# enqueued, so only one combined dispatch reaches handle_message.
assert adapter.handle_message.await_count == 1
event = adapter.handle_message.await_args[0][0]
assert event.text == "hello world"
@ -882,3 +895,76 @@ class TestWeixinContentDedup:
assert adapter.handle_message.await_count == 0
# is_duplicate should only be called for message_id, never for content
assert all("content:" not in str(call) for call in adapter._dedup.is_duplicate.call_args_list)
class TestWeixinTextDebounce:
"""Text-debounce batching for rapid multi-message bursts (issue #35301).
Delays are read from ``config.extra`` (config.yaml), not env vars.
"""
def test_batch_delays_default_from_config(self):
adapter = _make_adapter()
assert adapter._text_batch_delay_seconds == 3.0
assert adapter._text_batch_split_delay_seconds == 5.0
def test_batch_delays_overridden_via_config_extra(self):
adapter = WeixinAdapter(
PlatformConfig(
enabled=True,
token="test-token",
extra={
"account_id": "test-account",
"text_batch_delay_seconds": "0.5",
"text_batch_split_delay_seconds": 1.5,
},
)
)
assert adapter._text_batch_delay_seconds == 0.5
assert adapter._text_batch_split_delay_seconds == 1.5
def test_invalid_config_value_falls_back_to_default(self):
adapter = WeixinAdapter(
PlatformConfig(
enabled=True,
token="test-token",
extra={
"account_id": "test-account",
"text_batch_delay_seconds": "not-a-number",
"text_batch_split_delay_seconds": -4,
},
)
)
assert adapter._text_batch_delay_seconds == 3.0
assert adapter._text_batch_split_delay_seconds == 5.0
def test_rapid_texts_collapse_into_single_dispatch(self):
adapter = _make_adapter()
adapter._text_batch_delay_seconds = 0.05
adapter._text_batch_split_delay_seconds = 0.05
dispatched = []
async def _capture(event):
dispatched.append(event.text)
adapter.handle_message = _capture
def _event(text):
return MessageEvent(
text=text,
message_type=MessageType.TEXT,
source=adapter.build_source(
chat_id="wxid_user1", chat_type="dm",
user_id="wxid_user1", user_name="wxid_user1",
),
)
async def _drive():
adapter._enqueue_text_event(_event("one"))
adapter._enqueue_text_event(_event("two"))
adapter._enqueue_text_event(_event("three"))
assert dispatched == [] # nothing flushed during the burst
await asyncio.sleep(0.2)
asyncio.run(_drive())
assert dispatched == ["one\ntwo\nthree"]

View file

@ -0,0 +1,107 @@
"""Text-debounce batching for the WhatsApp adapter (issue #35301).
WhatsApp delivers rapid multi-message bursts (forwarded batches, paste-splits)
individually. Without debounce each fragment triggers a separate agent
invocation, wasting tokens and flooding the user with reply fragments. This
mirrors the Telegram/WeCom/Feishu pattern.
Batch delays are read from ``config.extra`` (config.yaml), not env vars.
"""
import asyncio
from gateway.config import Platform, PlatformConfig
from gateway.platforms.base import MessageEvent, MessageType
from gateway.platforms.whatsapp import WhatsAppAdapter
from gateway.session import SessionSource
def _make_adapter(**extra):
base = {"session_name": "test"}
base.update(extra)
return WhatsAppAdapter(PlatformConfig(enabled=True, extra=base))
def _event(text):
src = SessionSource(
platform=Platform.WHATSAPP,
chat_id="chat123",
chat_type="dm",
user_id="user1",
user_name="tester",
)
return MessageEvent(text=text, message_type=MessageType.TEXT, source=src)
def test_batch_delays_default_from_config():
adapter = _make_adapter()
assert adapter._text_batch_delay_seconds == 5.0
assert adapter._text_batch_split_delay_seconds == 10.0
def test_batch_delays_overridden_via_config_extra():
adapter = _make_adapter(
text_batch_delay_seconds="2.5",
text_batch_split_delay_seconds=7,
)
assert adapter._text_batch_delay_seconds == 2.5
assert adapter._text_batch_split_delay_seconds == 7.0
def test_invalid_config_value_falls_back_to_default():
adapter = _make_adapter(
text_batch_delay_seconds="garbage",
text_batch_split_delay_seconds=-3,
)
assert adapter._text_batch_delay_seconds == 5.0
assert adapter._text_batch_split_delay_seconds == 10.0
def test_env_var_is_ignored(monkeypatch):
# Config-only path: the legacy HERMES_* env var must NOT influence delays.
monkeypatch.setenv("HERMES_WHATSAPP_TEXT_BATCH_DELAY_SECONDS", "99")
adapter = _make_adapter()
assert adapter._text_batch_delay_seconds == 5.0
def test_rapid_texts_collapse_into_single_dispatch():
adapter = _make_adapter(
text_batch_delay_seconds=0.05,
text_batch_split_delay_seconds=0.05,
)
dispatched = []
async def _capture(event):
dispatched.append(event.text)
adapter.handle_message = _capture
async def _drive():
adapter._enqueue_text_event(_event("one"))
adapter._enqueue_text_event(_event("two"))
adapter._enqueue_text_event(_event("three"))
assert dispatched == [] # nothing flushed during the burst
await asyncio.sleep(0.2)
asyncio.run(_drive())
assert dispatched == ["one\ntwo\nthree"]
def test_lone_message_dispatched_alone():
adapter = _make_adapter(
text_batch_delay_seconds=0.05,
text_batch_split_delay_seconds=0.05,
)
dispatched = []
async def _capture(event):
dispatched.append(event.text)
adapter.handle_message = _capture
async def _drive():
adapter._enqueue_text_event(_event("solo"))
await asyncio.sleep(0.2)
asyncio.run(_drive())
assert dispatched == ["solo"]

View file

@ -39,6 +39,45 @@ def mock_args():
return SimpleNamespace()
class TestCmdUpdatePip:
"""Regression tests for pip-install update flows."""
@patch("shutil.which", return_value="/usr/bin/uv")
@patch("subprocess.run")
def test_update_pip_exports_virtualenv_from_sys_prefix(
self, mock_run, _mock_which, mock_args, monkeypatch
):
from hermes_cli import main as hm
mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(hm.sys, "prefix", "/tmp/hermes-launcher-venv")
monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
hm._cmd_update_pip(mock_args)
assert mock_run.call_count == 1
assert mock_run.call_args.args[0] == ["/usr/bin/uv", "pip", "install", "--upgrade", "hermes-agent"]
assert mock_run.call_args.kwargs["env"]["VIRTUAL_ENV"] == "/tmp/hermes-launcher-venv"
@patch("shutil.which", return_value="/usr/bin/uv")
@patch("subprocess.run")
def test_update_pip_does_not_export_virtualenv_for_system_python(
self, mock_run, _mock_which, mock_args, monkeypatch
):
from hermes_cli import main as hm
mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(hm.sys, "prefix", "/usr")
monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
hm._cmd_update_pip(mock_args)
assert mock_run.call_count == 1
assert "env" not in mock_run.call_args.kwargs
class TestCmdUpdateBranchFallback:
"""cmd_update falls back to main when current branch has no remote counterpart."""

View file

@ -6,25 +6,6 @@ from unittest.mock import patch
from hermes_cli.model_switch import list_authenticated_providers
@patch.dict(os.environ, {"GH_TOKEN": "test-key"}, clear=False)
def test_copilot_picker_keeps_curated_copilot_models_when_live_catalog_unavailable():
with patch("agent.models_dev.fetch_models_dev", return_value={}), \
patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \
patch("hermes_cli.models._fetch_github_models", return_value=None):
providers = list_authenticated_providers(current_provider="openrouter", max_models=50)
copilot = next((p for p in providers if p["slug"] == "copilot"), None)
assert copilot is not None
assert "gpt-5.4" in copilot["models"]
assert "claude-sonnet-4.6" in copilot["models"]
assert "claude-sonnet-4" in copilot["models"]
assert "claude-sonnet-4.5" in copilot["models"]
assert "claude-haiku-4.5" in copilot["models"]
assert "gemini-3.1-pro-preview" in copilot["models"]
assert "claude-opus-4.6" not in copilot["models"]
@patch.dict(os.environ, {"GH_TOKEN": "test-key"}, clear=False)
def test_copilot_picker_uses_live_catalog_when_available():
live_models = ["gpt-5.4", "claude-sonnet-4.6", "gemini-3.1-pro-preview"]

View file

@ -80,6 +80,25 @@ def loopback_app():
web_server.app.state.auth_required = prev_required
@pytest.fixture
def insecure_public_app():
"""web_server.app configured for all-interfaces insecure mode."""
_reset_for_tests()
clear_providers()
prev_host = getattr(web_server.app.state, "bound_host", None)
prev_port = getattr(web_server.app.state, "bound_port", None)
prev_required = getattr(web_server.app.state, "auth_required", None)
web_server.app.state.bound_host = "0.0.0.0"
web_server.app.state.bound_port = 9120
web_server.app.state.auth_required = False
client = TestClient(web_server.app, base_url="http://192.168.0.222:9120")
yield client
_reset_for_tests()
web_server.app.state.bound_host = prev_host
web_server.app.state.bound_port = prev_port
web_server.app.state.auth_required = prev_required
def _logged_in(client: TestClient) -> None:
"""Drive the stub OAuth round trip so the client holds session cookies."""
r1 = client.get("/auth/login?provider=stub", follow_redirects=False)
@ -143,6 +162,30 @@ class TestWsTicketEndpoint:
# ---------------------------------------------------------------------------
@pytest.fixture
def insecure_explicit_host_app():
"""web_server.app bound to an explicit non-loopback host (--insecure).
Models `--host 100.64.0.10 --insecure` (e.g. a Tailscale IP behind
`tailscale serve`) a specific address rather than the all-interfaces
0.0.0.0 wildcard.
"""
_reset_for_tests()
clear_providers()
prev_host = getattr(web_server.app.state, "bound_host", None)
prev_port = getattr(web_server.app.state, "bound_port", None)
prev_required = getattr(web_server.app.state, "auth_required", None)
web_server.app.state.bound_host = "100.64.0.10"
web_server.app.state.bound_port = 9119
web_server.app.state.auth_required = False
client = TestClient(web_server.app, base_url="http://100.64.0.10:9119")
yield client
_reset_for_tests()
web_server.app.state.bound_host = prev_host
web_server.app.state.bound_port = prev_port
web_server.app.state.auth_required = prev_required
def _fake_ws(*, query: dict, client_host: str = "127.0.0.1", path: str = "/api/pty"):
"""Build a stand-in for starlette.WebSocket good enough for _ws_auth_ok."""
@ -281,6 +324,48 @@ class TestWsRequestIsAllowedGated:
ws.headers = {"host": "127.0.0.1:8080"}
assert web_server._ws_request_is_allowed(ws) is True
def test_non_loopback_peer_allowed_in_insecure_public_mode(self, insecure_public_app):
"""`--host 0.0.0.0 --insecure` is an explicit LAN/public opt-in.
Regression coverage for the dashboard `/chat` breakage where the
HTML shell loaded on 9120 but every WebSocket upgrade was rejected
with 403 because the loopback-only peer guard still ran even though
the operator intentionally exposed the dashboard on all interfaces.
"""
ws = _fake_ws(query={}, client_host="192.168.0.55")
ws.headers = {
"host": "192.168.0.222:9120",
"origin": "http://192.168.0.222:9120",
}
assert web_server._ws_request_is_allowed(ws) is True
def test_peer_allowed_on_explicit_non_loopback_bind(self, insecure_explicit_host_app):
"""`--host 100.64.0.10 --insecure` (Tailscale/LAN IP) is an explicit
non-loopback opt-in too not just the 0.0.0.0 wildcard.
Regression coverage: the merged 0.0.0.0/:: fix did not cover binding
directly to a specific tailnet/LAN address, so `/chat` HTML loaded but
WS upgrades were still rejected by the loopback-only peer guard.
"""
ws = _fake_ws(query={}, client_host="100.64.0.99")
ws.headers = {
"host": "100.64.0.10:9119",
"origin": "http://100.64.0.10:9119",
}
assert web_server._ws_request_is_allowed(ws) is True
def test_rebinding_host_rejected_on_explicit_non_loopback_bind(
self, insecure_explicit_host_app
):
"""Lifting the peer-IP gate for an explicit bind must NOT lift the
DNS-rebinding Host guard: a mismatched Host header is still rejected,
because an explicit non-loopback bind requires an exact Host match in
`_is_accepted_host` (unlike the 0.0.0.0 wildcard, which accepts any).
"""
ws = _fake_ws(query={}, client_host="100.64.0.99")
ws.headers = {"host": "evil.example.com"}
assert web_server._ws_request_is_allowed(ws) is False
def test_host_origin_guard_still_runs_in_gated_mode(self, gated_app):
"""Bypassing the peer-IP check must not bypass the DNS-rebinding
Host header guard that one still protects against attacker

View file

@ -80,14 +80,6 @@ class TestGmiConfigRegistry:
class TestGmiModelCatalog:
def test_static_model_fallback_exists(self):
assert "gmi" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["gmi"]
assert "zai-org/GLM-5.1-FP8" in models
assert "deepseek-ai/DeepSeek-V3.2" in models
assert "moonshotai/Kimi-K2.5" in models
assert "anthropic/claude-sonnet-4.6" in models
def test_canonical_provider_entry(self):
slugs = [p.slug for p in CANONICAL_PROVIDERS]
assert "gmi" in slugs
@ -267,11 +259,6 @@ class TestGmiModelMetadata:
class TestGmiAuxiliary:
def test_aux_default_model(self):
from agent.auxiliary_client import _get_aux_model_for_provider
assert _get_aux_model_for_provider("gmi") == "google/gemini-3.1-flash-lite-preview"
def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")

View file

@ -106,20 +106,30 @@ def test_worker_block_on_child_with_done_parents_is_still_sticky(kanban_home: Pa
def test_circuit_breaker_block_still_auto_promotes(kanban_home: Path) -> None:
"""A child that was put into ``blocked`` *without* a worker-issued
``kanban_block`` (e.g. circuit-breaker after repeated spawn
failures, manual DB triage) must still get auto-promoted when its
parents complete preserves the pre-#28712 recovery semantics."""
``kanban_block`` (e.g. a transient crash, manual DB triage) and whose
``consecutive_failures`` is still *below* the circuit-breaker limit
must get auto-promoted when its parents complete preserves the
pre-#28712 recovery semantics for genuinely transient failures.
The complementary case a block whose failure count has *reached*
the limit must stay blocked is covered by
``test_kanban_db.py::test_recompute_ready_skips_tasks_at_failure_limit``
(#35072). Together they pin the contract: ``recompute_ready`` defers
the give-up decision to the same effective limit the breaker uses, so
the two never disagree.
"""
with kb.connect() as conn:
parent = kb.create_task(conn, title="parent")
child = kb.create_task(conn, title="child", parents=[parent])
kb.complete_task(conn, parent, result="ok")
# Simulate a circuit-breaker / direct triage that flips status
# without emitting a ``blocked`` event — exactly what
# ``_record_task_failure`` does after a ``gave_up``.
# Simulate a transient circuit-breaker / direct triage that flips
# status without emitting a ``blocked`` event — exactly what
# ``_record_task_failure`` does below the limit. One failure is
# under the default limit (2), so recovery is still correct.
conn.execute(
"UPDATE tasks SET status='blocked', consecutive_failures=5, "
"last_failure_error='persistent error' WHERE id=?",
"UPDATE tasks SET status='blocked', consecutive_failures=1, "
"last_failure_error='transient error' WHERE id=?",
(child,),
)
conn.commit()
@ -128,8 +138,9 @@ def test_circuit_breaker_block_still_auto_promotes(kanban_home: Path) -> None:
assert promoted == 1
task = kb.get_task(conn, child)
assert task.status == "ready"
assert task.consecutive_failures == 0
assert task.last_failure_error is None
# Counter is preserved across recovery (not reset) so the breaker
# can still accumulate if the task keeps failing (#35072).
assert task.consecutive_failures == 1
def test_gave_up_event_alone_does_not_make_block_sticky(kanban_home: Path) -> None:

View file

@ -307,7 +307,8 @@ def test_recompute_ready_cascades_through_chain(kanban_home):
def test_recompute_ready_promotes_blocked_with_done_parents(kanban_home):
"""blocked tasks with all parents done should be promoted to ready."""
"""blocked tasks with all parents done should be promoted to ready,
unless the circuit-breaker failure limit has been reached."""
with kb.connect() as conn:
parent = kb.create_task(conn, title="parent", assignee="a")
child = kb.create_task(
@ -316,16 +317,16 @@ def test_recompute_ready_promotes_blocked_with_done_parents(kanban_home):
# Complete the parent
kb.claim_task(conn, parent)
kb.complete_task(conn, parent, result="ok")
# Manually block the child (simulates a worker that failed
# after the parent finished)
# Manually block the child with zero failures (simulates a
# dependency block, not a circuit-breaker block).
conn.execute(
"UPDATE tasks SET status='blocked', consecutive_failures=5, "
"last_failure_error='persistent error' WHERE id=?",
"UPDATE tasks SET status='blocked', consecutive_failures=0, "
"last_failure_error=NULL WHERE id=?",
(child,),
)
conn.commit()
assert kb.get_task(conn, child).status == "blocked"
# recompute_ready should promote blocked → ready and reset failures
# recompute_ready should promote blocked → ready
promoted = kb.recompute_ready(conn)
assert promoted == 1
task = kb.get_task(conn, child)
@ -815,6 +816,149 @@ def test_unblock_resets_failure_counters(kanban_home):
assert task.last_failure_error is None
def test_recompute_ready_skips_tasks_at_failure_limit(kanban_home):
"""recompute_ready must not auto-recover tasks whose consecutive_failures
has reached the circuit-breaker limit (#35072).
Without this guard, a task that repeatedly exhausts its iteration
budget would cycle forever: block auto-recover (counter reset)
respawn budget exhausted block
"""
with kb.connect() as conn:
parent = kb.create_task(conn, title="parent", assignee="a")
child = kb.create_task(conn, title="child", assignee="a",
parents=[parent])
# Complete the parent so the child's dependencies are satisfied.
kb.claim_task(conn, parent)
kb.complete_task(conn, parent, summary="done")
# Simulate the child having exhausted its budget twice,
# hitting the default failure limit (2).
kb.claim_task(conn, child)
kb._record_task_failure(
conn, child, error="budget exhausted 1",
outcome="timed_out", release_claim=True, end_run=True,
failure_limit=2,
)
kb._record_task_failure(
conn, child, error="budget exhausted 2",
outcome="timed_out", release_claim=True, end_run=True,
failure_limit=2,
)
task = kb.get_task(conn, child)
assert task.status == "blocked"
assert task.consecutive_failures >= 2
# recompute_ready must NOT promote this task — the circuit
# breaker has tripped and it should stay blocked.
promoted = kb.recompute_ready(conn)
assert promoted == 0
assert kb.get_task(conn, child).status == "blocked"
# Explicit unblock should still work and reset the counter.
assert kb.unblock_task(conn, child)
task = kb.get_task(conn, child)
assert task.status == "ready"
assert task.consecutive_failures == 0
def test_recompute_ready_recovers_below_limit(kanban_home):
"""recompute_ready auto-recovers blocked tasks that haven't hit the
failure limit yet the counter is preserved across recovery."""
with kb.connect() as conn:
t = kb.create_task(conn, title="task", assignee="a")
kb.claim_task(conn, t)
# One failure, below the default limit of 2.
kb._record_task_failure(
conn, t, error="budget exhausted 1",
outcome="timed_out", release_claim=True, end_run=True,
failure_limit=2,
)
task = kb.get_task(conn, t)
assert task.status == "ready"
assert task.consecutive_failures == 1
# Simulate being blocked by something else (not circuit breaker).
conn.execute(
"UPDATE tasks SET status = 'blocked' WHERE id = ?", (t,),
)
conn.commit()
promoted = kb.recompute_ready(conn)
assert promoted == 1
task = kb.get_task(conn, t)
assert task.status == "ready"
# Counter must be preserved, not reset.
assert task.consecutive_failures == 1
def test_recompute_ready_honours_dispatcher_failure_limit(kanban_home):
"""The guard's effective limit must follow the same resolution order
as the circuit breaker (#35072): per-task max_retries → dispatcher
failure_limit DEFAULT_FAILURE_LIMIT.
Without threading the dispatcher's ``kanban.failure_limit`` through,
the guard falls back to DEFAULT_FAILURE_LIMIT and disagrees with the
breaker sticking a task prematurely (config limit > default) or
letting a tripped task escape (config limit < default).
"""
with kb.connect() as conn:
# Config allows MORE retries than the default. A task blocked
# with failures below the configured limit must still recover.
t = kb.create_task(conn, title="lenient", assignee="a")
conn.execute(
"UPDATE tasks SET status='blocked', consecutive_failures=? "
"WHERE id=?",
(kb.DEFAULT_FAILURE_LIMIT, t),
)
conn.commit()
# Default-limit call would stick it (failures >= default).
assert kb.recompute_ready(conn) == 0
assert kb.get_task(conn, t).status == "blocked"
# Dispatcher configured a higher limit → recover, preserve counter.
promoted = kb.recompute_ready(
conn, failure_limit=kb.DEFAULT_FAILURE_LIMIT + 2
)
assert promoted == 1
task = kb.get_task(conn, t)
assert task.status == "ready"
assert task.consecutive_failures == kb.DEFAULT_FAILURE_LIMIT
# Config allows FEWER retries than the default. A task at the
# stricter limit must stay blocked even though it's below default.
t2 = kb.create_task(conn, title="strict", assignee="a")
conn.execute(
"UPDATE tasks SET status='blocked', consecutive_failures=1 "
"WHERE id=?",
(t2,),
)
conn.commit()
# Default-limit (2) would recover it (1 < 2).
# Stricter config limit (1) must keep it blocked (1 >= 1).
assert kb.recompute_ready(conn, failure_limit=1) == 0
assert kb.get_task(conn, t2).status == "blocked"
def test_recompute_ready_per_task_max_retries_overrides_dispatcher(kanban_home):
"""A per-task ``max_retries`` wins over the dispatcher failure_limit,
matching ``_record_task_failure``'s resolution order."""
with kb.connect() as conn:
t = kb.create_task(conn, title="per-task", assignee="a")
# Per-task allows 4 retries; dispatcher config says 2.
conn.execute(
"UPDATE tasks SET status='blocked', consecutive_failures=2, "
"max_retries=4 WHERE id=?",
(t,),
)
conn.commit()
# failures(2) < per-task limit(4) → recover, despite dispatcher=2.
promoted = kb.recompute_ready(conn, failure_limit=2)
assert promoted == 1
task = kb.get_task(conn, t)
assert task.status == "ready"
assert task.consecutive_failures == 2
# ---------------------------------------------------------------------------
# Parent-completion invariant at the claim gate (RCA t_a6acd07d)
# ---------------------------------------------------------------------------

View file

@ -1,11 +1,74 @@
from __future__ import annotations
import sqlite3
import threading
from pathlib import Path
from hermes_cli import kanban_db as kb
def _make_legacy_db(path: Path) -> None:
"""Write a kanban DB with the pre-AUTOINCREMENT (TEXT PK) schema for the
four tables #35096 affects, keeping every other table current so the
additive-column migration runs cleanly on top.
"""
conn = sqlite3.connect(str(path))
conn.executescript(kb.SCHEMA_SQL)
conn.executescript(
"""
DROP TABLE task_events;
DROP TABLE task_comments;
DROP TABLE task_runs;
DROP TABLE kanban_notify_subs;
CREATE TABLE task_comments (id TEXT PRIMARY KEY, task_id TEXT NOT NULL,
author TEXT NOT NULL, body TEXT NOT NULL, created_at INTEGER NOT NULL);
CREATE TABLE task_events (id TEXT PRIMARY KEY, task_id TEXT NOT NULL,
kind TEXT NOT NULL, payload TEXT, created_at INTEGER NOT NULL);
CREATE TABLE task_runs (id TEXT PRIMARY KEY, task_id TEXT NOT NULL,
profile TEXT, status TEXT NOT NULL, started_at INTEGER NOT NULL);
CREATE TABLE kanban_notify_subs (task_id TEXT NOT NULL, platform TEXT NOT NULL,
chat_id TEXT NOT NULL, thread_id TEXT NOT NULL DEFAULT '', user_id TEXT,
created_at INTEGER NOT NULL, last_event_id TEXT,
PRIMARY KEY (task_id, platform, chat_id, thread_id));
"""
)
conn.execute("INSERT INTO tasks (id, title, status, created_at) VALUES ('task-1', 'T', 'done', 1000)")
conn.execute("INSERT INTO task_comments VALUES ('c-1', 'task-1', 'agent', 'hi', 1500)")
conn.execute("INSERT INTO task_events VALUES ('e-1', 'task-1', 'completed', NULL, 2000)")
conn.execute("INSERT INTO task_events VALUES ('e-2', 'task-1', 'blocked', NULL, 2100)")
conn.execute("INSERT INTO task_runs VALUES ('r-1', 'task-1', 'default', 'done', 1000)")
conn.execute(
"INSERT INTO kanban_notify_subs (task_id, platform, chat_id, created_at, last_event_id) "
"VALUES ('task-1', 'telegram', '123', 1000, 'e-1')"
)
conn.commit()
conn.close()
def _setup_home(tmp_path, monkeypatch) -> Path:
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
db_path = kb.kanban_db_path(board="legacy")
db_path.parent.mkdir(parents=True, exist_ok=True)
kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
return db_path
def _table_struct(conn: sqlite3.Connection, table: str):
cols = [
(r["name"], (r["type"] or "").upper(), r["notnull"], r["pk"])
for r in conn.execute(f"PRAGMA table_info({table})")
]
idx = sorted(
r["name"]
for r in conn.execute(f"PRAGMA index_list({table})")
if not r["name"].startswith("sqlite_")
)
return cols, idx
def test_connect_initialization_is_thread_safe(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
@ -36,3 +99,79 @@ def test_connect_initialization_is_thread_safe(tmp_path, monkeypatch):
with kb.connect(board="default") as conn:
cols = {row["name"] for row in conn.execute("PRAGMA table_info(tasks)")}
assert "max_retries" in cols
def test_legacy_text_pk_tables_rebuilt_to_integer_autoincrement(tmp_path, monkeypatch):
"""A pre-AUTOINCREMENT DB is migrated in place: id columns become INTEGER
PKs, ``last_event_id`` becomes INTEGER, data is preserved, and indexes
are recreated (DROP TABLE would otherwise take them down)."""
db_path = _setup_home(tmp_path, monkeypatch)
_make_legacy_db(db_path)
with kb.connect(db_path) as conn:
for table in ("task_events", "task_comments", "task_runs"):
id_col = {r["name"]: r for r in conn.execute(f"PRAGMA table_info({table})")}["id"]
assert id_col["type"].upper() == "INTEGER" and id_col["pk"] == 1
lei = {r["name"]: r for r in conn.execute("PRAGMA table_info(kanban_notify_subs)")}
assert lei["last_event_id"]["type"].upper() == "INTEGER"
# Data preserved across the rebuild.
assert len(conn.execute("SELECT * FROM task_events").fetchall()) == 2
assert conn.execute("SELECT body FROM task_comments").fetchone()["body"] == "hi"
assert len(conn.execute("SELECT * FROM task_runs").fetchall()) == 1
# Non-numeric legacy cursor ("e-1") casts to 0.
assert conn.execute("SELECT last_event_id FROM kanban_notify_subs").fetchone()["last_event_id"] == 0
# Indexes restored, including idx_events_run (added by the additive pass).
indexes = {r[0] for r in conn.execute("SELECT name FROM sqlite_master WHERE type='index'")}
for name in ("idx_events_task", "idx_events_run", "idx_comments_task",
"idx_runs_task", "idx_runs_status", "idx_notify_task"):
assert name in indexes
# AUTOINCREMENT actually works after the rebuild.
conn.execute("INSERT INTO task_events (task_id, kind, created_at) VALUES ('task-1', 'completed', 3000)")
new_id = conn.execute("SELECT id FROM task_events ORDER BY id DESC LIMIT 1").fetchone()["id"]
assert isinstance(new_id, int) and new_id >= 1
def test_rebuilt_schema_matches_fresh_db(tmp_path, monkeypatch):
"""The rebuilt tables must be structurally identical to a fresh DB, so the
hand-written DDL in ``_REBUILD_SPECS`` can't silently drift from SCHEMA_SQL."""
legacy_path = _setup_home(tmp_path, monkeypatch)
_make_legacy_db(legacy_path)
fresh_path = kb.kanban_db_path(board="fresh")
fresh_path.parent.mkdir(parents=True, exist_ok=True)
kb._INITIALIZED_PATHS.discard(str(fresh_path.resolve()))
with kb.connect(legacy_path) as migrated, kb.connect(fresh_path) as fresh:
for table in ("task_events", "task_comments", "task_runs", "kanban_notify_subs"):
assert _table_struct(migrated, table) == _table_struct(fresh, table)
def test_migration_is_idempotent(tmp_path, monkeypatch):
"""Re-opening an already-migrated DB is a no-op and leaves data intact."""
db_path = _setup_home(tmp_path, monkeypatch)
_make_legacy_db(db_path)
with kb.connect(db_path):
pass
kb._INITIALIZED_PATHS.discard(str(db_path.resolve()))
with kb.connect(db_path) as conn:
id_col = {r["name"]: r for r in conn.execute("PRAGMA table_info(task_events)")}["id"]
assert id_col["type"].upper() == "INTEGER"
assert len(conn.execute("SELECT * FROM task_events").fetchall()) == 2
def test_unseen_events_for_sub_survives_migrated_db(tmp_path, monkeypatch):
"""The crash that motivated #35096 — ``int(None)`` on a NULL cursor — is
gone after migration; the notifier query returns an integer cursor."""
db_path = _setup_home(tmp_path, monkeypatch)
_make_legacy_db(db_path)
with kb.connect(db_path) as conn:
cursor, events = kb.unseen_events_for_sub(
conn, task_id="task-1", platform="telegram", chat_id="123"
)
assert isinstance(cursor, int)
assert isinstance(events, list)

View file

@ -0,0 +1,166 @@
"""Regression tests for bounded/lazy CLI MCP startup."""
from __future__ import annotations
from argparse import Namespace
import sys
import threading
import time
import types
import pytest
import cli as cli_mod
from hermes_cli import main as main_mod
from hermes_cli import mcp_startup
@pytest.fixture(autouse=True)
def _reset_mcp_startup_state():
saved_started = mcp_startup._mcp_discovery_started
saved_thread = mcp_startup._mcp_discovery_thread
try:
mcp_startup._mcp_discovery_started = False
mcp_startup._mcp_discovery_thread = None
yield
finally:
thread = mcp_startup._mcp_discovery_thread
if thread is not None and thread.is_alive():
thread.join(timeout=1.0)
mcp_startup._mcp_discovery_started = saved_started
mcp_startup._mcp_discovery_thread = saved_thread
def _agent_args(**overrides) -> Namespace:
base = {
"accept_hooks": False,
"command": "chat",
"cron_command": None,
"gateway_command": None,
"mcp_action": None,
"tui": False,
}
base.update(overrides)
return Namespace(**base)
def test_prepare_agent_startup_backgrounds_blocking_mcp_for_chat(monkeypatch):
stop = threading.Event()
calls = {"mcp": 0}
def _blocking_discover():
calls["mcp"] += 1
stop.wait()
monkeypatch.setitem(
sys.modules,
"hermes_cli.plugins",
types.SimpleNamespace(discover_plugins=lambda: None),
)
monkeypatch.setitem(
sys.modules,
"hermes_cli.config",
types.SimpleNamespace(
read_raw_config=lambda: {"mcp_servers": {"demo": {"transport": "stdio"}}},
load_config=lambda: {},
),
)
monkeypatch.setitem(
sys.modules,
"agent.shell_hooks",
types.SimpleNamespace(register_from_config=lambda *_a, **_k: None),
)
monkeypatch.setitem(
sys.modules,
"tools.mcp_tool",
types.SimpleNamespace(discover_mcp_tools=_blocking_discover),
)
try:
start = time.monotonic()
main_mod._prepare_agent_startup(_agent_args())
elapsed = time.monotonic() - start
assert elapsed < 0.2
assert calls["mcp"] == 1
assert mcp_startup._mcp_discovery_thread is not None
assert mcp_startup._mcp_discovery_thread.is_alive()
finally:
stop.set()
def test_prepare_agent_startup_skips_mcp_bootstrap_for_tui_chat(monkeypatch):
calls = {"mcp": 0}
monkeypatch.setitem(
sys.modules,
"hermes_cli.plugins",
types.SimpleNamespace(discover_plugins=lambda: None),
)
monkeypatch.setitem(
sys.modules,
"hermes_cli.config",
types.SimpleNamespace(load_config=lambda: {}),
)
monkeypatch.setitem(
sys.modules,
"agent.shell_hooks",
types.SimpleNamespace(register_from_config=lambda *_a, **_k: None),
)
monkeypatch.setitem(
sys.modules,
"tools.mcp_tool",
types.SimpleNamespace(
discover_mcp_tools=lambda: calls.__setitem__("mcp", calls["mcp"] + 1)
),
)
main_mod._prepare_agent_startup(_agent_args(tui=True))
assert calls["mcp"] == 0
assert mcp_startup._mcp_discovery_thread is None
def test_cli_get_tool_definitions_briefly_waits_for_fast_mcp_thread(monkeypatch):
thread = threading.Thread(target=lambda: time.sleep(0.05), daemon=True)
thread.start()
mcp_startup._mcp_discovery_thread = thread
monkeypatch.setitem(
sys.modules,
"model_tools",
types.SimpleNamespace(get_tool_definitions=lambda *_a, **_k: ["ok"]),
)
start = time.monotonic()
result = cli_mod.get_tool_definitions(enabled_toolsets=["web"], quiet_mode=True)
elapsed = time.monotonic() - start
assert result == ["ok"]
assert elapsed >= 0.04
assert not thread.is_alive()
def test_init_agent_waits_for_mcp_discovery_before_agent_build(monkeypatch):
waited = {"done": False}
cli = cli_mod.HermesCLI(compact=True)
cli._session_db = object()
cli._resumed = False
cli.conversation_history = []
cli._install_tool_callbacks = lambda: None
cli._ensure_tirith_security = lambda: None
cli._ensure_runtime_credentials = lambda: True
monkeypatch.setattr(
mcp_startup,
"wait_for_mcp_discovery",
lambda timeout=0.75: waited.__setitem__("done", True),
)
def _fake_agent(*_a, **_k):
assert waited["done"] is True
return types.SimpleNamespace()
monkeypatch.setattr(cli_mod, "AIAgent", _fake_agent)
assert cli._init_agent() is True

View file

@ -0,0 +1,50 @@
"""Tests for `hermes memory setup [provider]` routing.
The `memory setup` subcommand accepts an optional positional ``provider`` so a
fresh install can configure a specific provider directly (e.g.
``hermes memory setup honcho``) without the interactive picker which matters
because the per-provider ``hermes <provider>`` subcommand is only registered
once that provider is active.
"""
from types import SimpleNamespace
from unittest.mock import patch
from hermes_cli import memory_setup
class TestMemorySetupProviderRouting:
def test_setup_with_provider_arg_skips_picker(self):
"""`memory setup honcho` routes straight to cmd_setup_provider."""
args = SimpleNamespace(memory_command="setup", provider="honcho")
with patch.object(memory_setup, "cmd_setup_provider") as direct, \
patch.object(memory_setup, "cmd_setup") as picker:
memory_setup.memory_command(args)
direct.assert_called_once_with("honcho")
picker.assert_not_called()
def test_setup_without_provider_runs_picker(self):
"""`memory setup` (no provider) runs the interactive picker."""
args = SimpleNamespace(memory_command="setup", provider=None)
with patch.object(memory_setup, "cmd_setup_provider") as direct, \
patch.object(memory_setup, "cmd_setup") as picker:
memory_setup.memory_command(args)
picker.assert_called_once_with(args)
direct.assert_not_called()
def test_setup_with_missing_provider_attr_runs_picker(self):
"""A SimpleNamespace lacking `provider` must not crash — fall back to picker."""
args = SimpleNamespace(memory_command="setup")
with patch.object(memory_setup, "cmd_setup_provider") as direct, \
patch.object(memory_setup, "cmd_setup") as picker:
memory_setup.memory_command(args)
picker.assert_called_once_with(args)
direct.assert_not_called()
def test_unknown_provider_reports_and_returns_early(self, capsys):
"""An unknown provider name surfaces a helpful message and returns
before any config load/save (the not-found guard precedes those imports)."""
memory_setup.cmd_setup_provider("notaprovider")
out = capsys.readouterr().out
assert "not found" in out
assert "hermes memory setup" in out

View file

@ -142,10 +142,6 @@ class TestCuratedModelsForProvider:
assert len(models) > 0
assert any("claude" in m[0] for m in models)
def test_zai_returns_glm_models(self):
models = curated_models_for_provider("zai")
assert any("glm" in m[0] for m in models)
def test_unknown_provider_returns_empty(self):
assert curated_models_for_provider("totally-unknown") == []
@ -199,9 +195,6 @@ class TestProviderModelIds:
def test_unknown_provider_returns_empty(self):
assert provider_model_ids("some-unknown-provider") == []
def test_zai_returns_glm_models(self):
assert "glm-5" in provider_model_ids("zai")
def test_stepfun_prefers_live_catalog(self):
with patch(
"hermes_cli.auth.resolve_api_key_provider_credentials",
@ -222,31 +215,6 @@ class TestProviderModelIds:
patch("hermes_cli.models._fetch_github_models", return_value=["gpt-5.4", "claude-sonnet-4.6"]):
assert provider_model_ids("copilot-acp") == ["gpt-5.4", "claude-sonnet-4.6"]
def test_copilot_falls_back_to_curated_defaults_without_stale_opus(self):
with patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \
patch("hermes_cli.models._fetch_github_models", return_value=None):
ids = provider_model_ids("copilot")
assert "gpt-5.4" in ids
assert "claude-sonnet-4.6" in ids
assert "claude-sonnet-4" in ids
assert "claude-sonnet-4.5" in ids
assert "claude-haiku-4.5" in ids
assert "gemini-3.1-pro-preview" in ids
assert "claude-opus-4.6" not in ids
def test_copilot_acp_falls_back_to_copilot_defaults(self):
with patch("hermes_cli.models._resolve_copilot_catalog_api_key", return_value="gh-token"), \
patch("hermes_cli.models._fetch_github_models", return_value=None):
ids = provider_model_ids("copilot-acp")
assert "gpt-5.4" in ids
assert "claude-sonnet-4.6" in ids
assert "claude-sonnet-4" in ids
assert "gemini-3.1-pro-preview" in ids
assert "copilot-acp" not in ids
assert "claude-opus-4.6" not in ids
# -- fetch_api_models --------------------------------------------------------

View file

@ -56,10 +56,6 @@ class TestOpenRouterModels:
assert isinstance(mid, str) and len(mid) > 0
assert isinstance(desc, str)
def test_at_least_5_models(self):
"""Sanity check that the models list hasn't been accidentally truncated."""
assert len(OPENROUTER_MODELS) >= 5
class TestFetchOpenRouterModels:
def test_live_fetch_recomputes_free_tags(self, monkeypatch):

View file

@ -231,3 +231,93 @@ def test_get_gateway_eligible_tools_ignores_quoted_false_opt_in(monkeypatch):
assert "web" in has_direct
assert "web" not in already_managed
assert set(unconfigured) == {"image_gen", "video_gen", "tts", "browser"}
def test_apply_nous_managed_defaults_writes_video_gen_config(monkeypatch):
"""apply_nous_managed_defaults must write video_gen.provider and
video_gen.use_gateway when a Nous subscriber selects video_gen
without a direct FAL_KEY."""
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
monkeypatch.delenv("FAL_KEY", raising=False)
monkeypatch.setattr(ns, "fal_key_is_configured", lambda: False)
monkeypatch.setattr(
ns, "get_nous_portal_account_info",
lambda **kw: _account(logged_in=True, paid=True),
)
config = {"model": {"provider": "nous"}}
changed = ns.apply_nous_managed_defaults(
config, enabled_toolsets=["video_gen"],
)
assert "video_gen" in changed
assert config["video_gen"]["provider"] == "fal"
assert config["video_gen"]["use_gateway"] is True
def test_apply_nous_managed_defaults_writes_image_gen_config(monkeypatch):
"""apply_nous_managed_defaults must write image_gen.use_gateway
when a Nous subscriber selects image_gen without a direct FAL_KEY."""
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
monkeypatch.delenv("FAL_KEY", raising=False)
monkeypatch.setattr(ns, "fal_key_is_configured", lambda: False)
monkeypatch.setattr(
ns, "get_nous_portal_account_info",
lambda **kw: _account(logged_in=True, paid=True),
)
config = {"model": {"provider": "nous"}}
changed = ns.apply_nous_managed_defaults(
config, enabled_toolsets=["image_gen"],
)
assert "image_gen" in changed
assert config["image_gen"]["use_gateway"] is True
def test_apply_nous_managed_defaults_skips_fal_tools_when_key_present(monkeypatch):
"""When FAL_KEY is set, apply_nous_managed_defaults should not touch
image_gen or video_gen config the user's direct key takes precedence."""
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
monkeypatch.setenv("FAL_KEY", "fal-direct-key")
monkeypatch.setattr(ns, "fal_key_is_configured", lambda: True)
monkeypatch.setattr(
ns, "get_nous_portal_account_info",
lambda **kw: _account(logged_in=True, paid=True),
)
config = {"model": {"provider": "nous"}}
changed = ns.apply_nous_managed_defaults(
config, enabled_toolsets=["image_gen", "video_gen"],
)
assert "image_gen" not in changed
assert "video_gen" not in changed
assert "image_gen" not in config
assert "video_gen" not in config
def test_apply_nous_managed_defaults_preserves_existing_video_gen_section(monkeypatch):
"""When video_gen config already exists as a dict, the function should
update it in-place rather than replacing it."""
monkeypatch.setattr(ns, "managed_nous_tools_enabled", lambda **kw: True)
monkeypatch.delenv("FAL_KEY", raising=False)
monkeypatch.setattr(ns, "fal_key_is_configured", lambda: False)
monkeypatch.setattr(
ns, "get_nous_portal_account_info",
lambda **kw: _account(logged_in=True, paid=True),
)
config = {
"model": {"provider": "nous"},
"video_gen": {"model": "pixverse-v6"},
}
changed = ns.apply_nous_managed_defaults(
config, enabled_toolsets=["video_gen"],
)
assert "video_gen" in changed
assert config["video_gen"]["provider"] == "fal"
assert config["video_gen"]["use_gateway"] is True
# Pre-existing keys should be preserved
assert config["video_gen"]["model"] == "pixverse-v6"

View file

@ -495,12 +495,3 @@ class TestOllamaCloudSuffixStripping:
assert _strip_ollama_cloud_suffix("qwen3-coder:480b-cloud") == "qwen3-coder:480b"
assert _strip_ollama_cloud_suffix("nemotron-3-nano:30b") == "nemotron-3-nano:30b"
assert _strip_ollama_cloud_suffix("") == ""
# ── Auxiliary Model ──
class TestOllamaCloudAuxiliary:
def test_aux_model_defined(self):
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
assert "ollama-cloud" in _API_KEY_PROVIDER_AUX_MODELS
assert _API_KEY_PROVIDER_AUX_MODELS["ollama-cloud"] == "nemotron-3-nano:30b"

View file

@ -48,12 +48,32 @@ def test_stamp_file_takes_precedence(tmp_path):
assert detect_install_method(project_root=tmp_path) == "docker"
def test_docker_detected_via_dockerenv(tmp_path):
def test_container_without_stamp_is_not_docker(tmp_path):
"""An unstamped install in a generic container must NOT be flagged as docker.
Regression for issue #34397. The two supported installs both stamp
``.install_method`` (the curl installer -> ``git``, covered by
``test_stamp_file_takes_precedence``; the published image -> ``docker``),
so neither hits this path. An unsupported manual install dropped into a
container has no stamp and was wrongly classified as the published Docker
image, so ``hermes update`` refused to run. With a ``.git`` checkout it
must resolve to ``git``.
"""
(tmp_path / ".git").mkdir()
with patch("hermes_cli.config.get_managed_system", return_value=None), \
patch("hermes_cli.config.get_hermes_home", return_value=tmp_path), \
patch("hermes_constants.is_container", return_value=True):
from hermes_cli.config import detect_install_method
assert detect_install_method(project_root=tmp_path) == "docker"
assert detect_install_method(project_root=tmp_path) == "git"
def test_container_pip_install_without_stamp_is_pip(tmp_path):
"""Container + no .git + no stamp -> pip, not docker (issue #34397)."""
with patch("hermes_cli.config.get_managed_system", return_value=None), \
patch("hermes_cli.config.get_hermes_home", return_value=tmp_path), \
patch("hermes_constants.is_container", return_value=True):
from hermes_cli.config import detect_install_method
assert detect_install_method(project_root=tmp_path) == "pip"
def test_recommended_update_command_docker():

View file

@ -754,8 +754,8 @@ class TestRenameProfile:
cfg = json.loads(honcho_path.read_text())
assert "hermes.ssi_health" not in cfg["hosts"]
assert cfg["hosts"]["hermes.heimdall"]["aiPeer"] == "ssi_health"
assert cfg["hosts"]["hermes.heimdall"]["peerName"] == "user-peer"
assert cfg["hosts"]["hermes_heimdall"]["aiPeer"] == "ssi_health"
assert cfg["hosts"]["hermes_heimdall"]["peerName"] == "user-peer"
def test_pins_ai_peer_when_absent_on_honcho_host_rename(self, profile_env):
tmp_path = profile_env
@ -772,8 +772,8 @@ class TestRenameProfile:
cfg = json.loads(honcho_path.read_text())
assert "hermes.ssi_health" not in cfg["hosts"]
assert cfg["hosts"]["hermes.heimdall"]["aiPeer"] == "ssi_health"
assert cfg["hosts"]["hermes.heimdall"]["workspace"] == "hermes"
assert cfg["hosts"]["hermes_heimdall"]["aiPeer"] == "ssi_health"
assert cfg["hosts"]["hermes_heimdall"]["workspace"] == "hermes"
def test_does_not_overwrite_existing_honcho_host_on_rename(self, profile_env):
tmp_path = profile_env
@ -782,7 +782,7 @@ class TestRenameProfile:
honcho_path.write_text(json.dumps({
"hosts": {
"hermes.ssi_health": {"aiPeer": "ssi_health"},
"hermes.heimdall": {"aiPeer": "heimdall"},
"hermes_heimdall": {"aiPeer": "heimdall"},
}
}))
@ -791,7 +791,7 @@ class TestRenameProfile:
cfg = json.loads(honcho_path.read_text())
assert cfg["hosts"]["hermes.ssi_health"]["aiPeer"] == "ssi_health"
assert cfg["hosts"]["hermes.heimdall"]["aiPeer"] == "heimdall"
assert cfg["hosts"]["hermes_heimdall"]["aiPeer"] == "heimdall"
def test_default_raises_value_error(self, profile_env):
with pytest.raises(ValueError, match="default"):

View file

@ -0,0 +1,118 @@
"""Tests for the ``hermes prompt-size`` diagnostic (issue #34667)."""
import json
import pytest
from hermes_cli.prompt_size import (
_SKILLS_BLOCK_RE,
compute_prompt_breakdown,
render_breakdown,
)
def _seed_memory(hermes_home, memory_text="", user_text=""):
mem_dir = hermes_home / "memories"
mem_dir.mkdir(parents=True, exist_ok=True)
if memory_text:
(mem_dir / "MEMORY.md").write_text(memory_text, encoding="utf-8")
if user_text:
(mem_dir / "USER.md").write_text(user_text, encoding="utf-8")
def _seed_skill(hermes_home, name, description):
skill_dir = hermes_home / "skills" / "demo" / name
skill_dir.mkdir(parents=True, exist_ok=True)
(skill_dir / "SKILL.md").write_text(
f"---\nname: {name}\ndescription: {description}\n---\n# {name}\nbody\n",
encoding="utf-8",
)
@pytest.fixture
def isolated_home(tmp_path, monkeypatch):
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.chdir(tmp_path) # avoid picking up the repo's AGENTS.md
return hermes_home
def test_breakdown_keys_and_shape(isolated_home):
"""The breakdown exposes every documented key with int byte/char counts."""
data = compute_prompt_breakdown("cli")
assert set(data) >= {
"platform",
"model",
"system_prompt",
"skills_index",
"memory",
"user_profile",
"tools",
"sections",
}
assert data["platform"] == "cli"
for key in ("system_prompt", "skills_index", "memory", "user_profile"):
assert data[key]["bytes"] >= 0
assert data[key]["chars"] >= 0
assert data["tools"]["count"] >= 0
assert data["tools"]["json_bytes"] >= 0
# System prompt is non-trivial even with empty home (identity + guidance).
assert data["system_prompt"]["bytes"] > 0
def test_runs_offline_without_credentials(isolated_home, monkeypatch):
"""No provider credentials configured → still produces a breakdown."""
for var in ("OPENROUTER_API_KEY", "OPENAI_API_KEY", "NOUS_API_KEY",
"ANTHROPIC_API_KEY"):
monkeypatch.delenv(var, raising=False)
data = compute_prompt_breakdown("cli")
assert data["system_prompt"]["bytes"] > 0
def test_skills_index_reflects_installed_skills(isolated_home):
"""Installing a skill makes the skills-index block non-empty.
Note: the skills prompt is cached per-process (in-process LRU + disk
snapshot), so we seed the skill BEFORE the first build rather than
comparing before/after within one process.
"""
_seed_skill(isolated_home, "hello", "a demo skill for size testing")
data = compute_prompt_breakdown("cli")
assert data["skills_index"]["bytes"] > 0
def test_memory_and_profile_are_attributed(isolated_home):
"""Memory and user-profile blocks are measured separately."""
_seed_memory(
isolated_home,
memory_text="Project uses pytest.\n",
user_text="User is a developer.\n",
)
data = compute_prompt_breakdown("cli")
assert data["memory"]["bytes"] > 0
assert data["user_profile"]["bytes"] > 0
def test_skills_block_regex_matches_tagged_block():
text = "preamble\n<available_skills>\n cat:\n - a: b\n</available_skills>\ntail"
m = _SKILLS_BLOCK_RE.search(text)
assert m is not None
assert m.group(0).startswith("<available_skills>")
assert m.group(0).endswith("</available_skills>")
def test_render_breakdown_is_plain_text(isolated_home):
data = compute_prompt_breakdown("cli")
out = render_breakdown(data)
assert "System prompt total" in out
assert "skills index" in out
assert "Tool schemas" in out
# Plain text — no JSON braces leaking in.
assert not out.strip().startswith("{")
def test_json_serializable(isolated_home):
data = compute_prompt_breakdown("cli")
# Round-trips cleanly for ``--json`` output.
assert json.loads(json.dumps(data)) == json.loads(json.dumps(data))

View file

@ -0,0 +1,118 @@
"""Tests for provider-group folding (display-only picker grouping).
These are invariant tests, not catalog snapshots: they assert how
``group_providers`` folds a flat slug list and how member slugs relate to
``PROVIDER_GROUPS`` / ``CANONICAL_PROVIDERS`` not the specific set of
vendors, which is expected to change over time.
"""
from hermes_cli.models import (
CANONICAL_PROVIDERS,
PROVIDER_GROUPS,
group_providers,
provider_group_for_slug,
)
def _slugs(rows):
"""Flatten picker rows back to the concrete slugs they expose."""
out = []
for r in rows:
if r["kind"] == "single":
out.append(r["slug"])
else:
out.extend(r["members"])
return out
def test_groups_reference_real_canonical_slugs():
"""Every group member must be an actual provider slug. Guards typos and
stale group entries after a provider is renamed/removed."""
canonical = {p.slug for p in CANONICAL_PROVIDERS}
for gid, (label, members) in PROVIDER_GROUPS.items():
assert label, f"group {gid} has empty label"
assert len(members) >= 1
for m in members:
assert m in canonical, f"group {gid} member {m!r} is not a canonical slug"
def test_member_slugs_are_unique_across_groups():
"""A slug may belong to at most one group."""
seen = {}
for gid, (_label, members) in PROVIDER_GROUPS.items():
for m in members:
assert m not in seen, f"{m!r} in both {seen[m]!r} and {gid!r}"
seen[m] = gid
def test_reverse_index_matches_groups():
for gid, (_label, members) in PROVIDER_GROUPS.items():
for m in members:
assert provider_group_for_slug(m) == gid
assert provider_group_for_slug("openrouter") == ""
assert provider_group_for_slug("") == ""
def test_ungrouped_providers_pass_through_in_order():
rows = group_providers(["nous", "openrouter", "deepseek"])
assert all(r["kind"] == "single" for r in rows)
assert [r["slug"] for r in rows] == ["nous", "openrouter", "deepseek"]
def test_multi_member_group_folds_to_one_row():
rows = group_providers(["minimax", "minimax-oauth", "minimax-cn"])
assert len(rows) == 1
row = rows[0]
assert row["kind"] == "group"
assert row["group_id"] == "minimax"
assert row["members"] == ["minimax", "minimax-oauth", "minimax-cn"]
def test_group_appears_at_first_member_position():
"""The group row takes the slot of its earliest-listed present member,
and later members do not re-emit."""
rows = group_providers(["nous", "minimax", "deepseek", "minimax-cn"])
kinds = [(r["kind"], r.get("group_id") or r.get("slug")) for r in rows]
assert kinds == [
("single", "nous"),
("group", "minimax"),
("single", "deepseek"),
]
# both minimax members folded into the single group row
assert rows[1]["members"] == ["minimax", "minimax-cn"]
def test_single_present_member_degrades_to_single_row():
"""A group with only one present member shows no submenu."""
rows = group_providers(["xai"]) # xai-oauth absent
assert len(rows) == 1
assert rows[0]["kind"] == "single"
assert rows[0]["slug"] == "xai"
def test_member_order_follows_declaration_not_input():
"""Inside a folded group, members are ordered by PROVIDER_GROUPS, not by
the order they appeared in the input list."""
rows = group_providers(["minimax-cn", "minimax", "minimax-oauth"])
assert rows[0]["members"] == ["minimax", "minimax-oauth", "minimax-cn"]
def test_duplicate_slugs_ignored():
rows = group_providers(["nous", "nous", "minimax", "minimax"])
assert [r.get("slug") or r["group_id"] for r in rows] == ["nous", "minimax"]
def test_fold_is_lossless_for_present_slugs():
"""Every input slug (deduped) must still be reachable through the folded
rows grouping hides nothing."""
flat = [p.slug for p in CANONICAL_PROVIDERS]
rows = group_providers(flat)
assert set(_slugs(rows)) == set(flat)
def test_canonical_fold_row_count_shrinks():
"""Folding the full canonical list produces fewer top-level rows than the
flat list (proves grouping actually consolidates)."""
flat = [p.slug for p in CANONICAL_PROVIDERS]
rows = group_providers(flat)
assert len(rows) < len(flat)

View file

@ -757,8 +757,68 @@ def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
assert config["web"]["backend"] == "firecrawl"
assert config["tts"]["provider"] == "openai"
assert config["browser"]["cloud_provider"] == "browser-use"
assert config["image_gen"]["use_gateway"] is True
assert configured == []
def test_first_install_nous_auto_configures_video_gen(monkeypatch):
"""When a Nous subscriber checks video_gen in the toolset checklist,
apply_nous_managed_defaults must write video_gen.provider and
video_gen.use_gateway so the FAL plugin can route through the gateway
at runtime. Regression test for the bug where video_gen was marked as
auto-configured but no config was actually written."""
monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
config = {
"model": {"provider": "nous"},
"platform_toolsets": {"cli": []},
}
for env_var in (
"VOICE_TOOLS_OPENAI_KEY",
"OPENAI_API_KEY",
"ELEVENLABS_API_KEY",
"FIRECRAWL_API_KEY",
"FIRECRAWL_API_URL",
"TAVILY_API_KEY",
"PARALLEL_API_KEY",
"BROWSERBASE_API_KEY",
"BROWSERBASE_PROJECT_ID",
"BROWSER_USE_API_KEY",
"FAL_KEY",
):
monkeypatch.delenv(env_var, raising=False)
monkeypatch.setattr(
"hermes_cli.tools_config._prompt_toolset_checklist",
lambda *args, **kwargs: {"video_gen"},
)
monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
monkeypatch.setattr(
"hermes_cli.tools_config._get_enabled_platforms",
lambda: ["cli"],
)
monkeypatch.setattr(
"hermes_cli.nous_subscription.get_nous_portal_account_info",
lambda *args, **kwargs: NousPortalAccountInfo(
logged_in=True,
source="jwt",
fresh=False,
paid_service_access=True,
),
)
configured = []
monkeypatch.setattr(
"hermes_cli.tools_config._configure_toolset",
lambda ts_key, config: configured.append(ts_key),
)
tools_command(first_install=True, config=config)
assert config["video_gen"]["provider"] == "fal"
assert config["video_gen"]["use_gateway"] is True
# video_gen should NOT appear in the manual configure list — it's auto-configured
assert "video_gen" not in configured
# ── Platform / toolset consistency ────────────────────────────────────────────

View file

@ -638,6 +638,60 @@ def test_oneshot_rejects_invalid_only_toolsets(monkeypatch, capsys):
assert "did not contain any valid toolsets" in err
def test_oneshot_fails_closed_on_empty_final_response(monkeypatch, capsys):
_stub_plugin_discovery(monkeypatch)
import hermes_cli.oneshot as oneshot_mod
monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "")
assert oneshot_mod.run_oneshot("hello") == 1
captured = capsys.readouterr()
assert captured.out == ""
assert "no final response" in captured.err
def test_oneshot_prints_nonempty_final_response(monkeypatch, capsys):
_stub_plugin_discovery(monkeypatch)
import hermes_cli.oneshot as oneshot_mod
monkeypatch.setattr(oneshot_mod, "_run_agent", lambda *_args, **_kwargs: "done")
assert oneshot_mod.run_oneshot("hello") == 0
captured = capsys.readouterr()
assert captured.out == "done\n"
assert captured.err == ""
def test_oneshot_fails_closed_on_agent_exception(monkeypatch, capsys):
_stub_plugin_discovery(monkeypatch)
import hermes_cli.oneshot as oneshot_mod
def _boom(*_args, **_kwargs):
raise OSError("not a TTY")
monkeypatch.setattr(oneshot_mod, "_run_agent", _boom)
assert oneshot_mod.run_oneshot("hello") == 1
captured = capsys.readouterr()
assert captured.out == ""
assert "agent failed" in captured.err
assert "not a TTY" in captured.err
def test_oneshot_reraises_keyboard_interrupt(monkeypatch):
_stub_plugin_discovery(monkeypatch)
import hermes_cli.oneshot as oneshot_mod
import pytest as _pytest
def _interrupt(*_args, **_kwargs):
raise KeyboardInterrupt
monkeypatch.setattr(oneshot_mod, "_run_agent", _interrupt)
with _pytest.raises(KeyboardInterrupt):
oneshot_mod.run_oneshot("hello")
def test_oneshot_filters_invalid_toolsets_before_redirect(monkeypatch, capsys):
_stub_plugin_discovery(monkeypatch)
from hermes_cli.oneshot import _validate_explicit_toolsets

View file

@ -128,24 +128,31 @@ def test_detect_concurrent_is_noop_off_windows(_winp, tmp_path):
def _fake_psutil_with_parent_chain(
parent_chain: list[int],
proc_iter_rows: list,
*,
ancestor_exe: str | None = None,
):
"""Build a psutil stand-in that has Process()/parent() AND process_iter().
"""Build a psutil stand-in that has Process()/parents()/exe() AND process_iter().
``parent_chain`` is the list of PIDs returned by successive ``.parent()``
calls starting from the seed (``os.getpid()``); the last entry's
``.parent()`` returns ``None`` to terminate the walk.
``parent_chain`` is the ordered list of ancestor PIDs (closest first)
returned by ``proc.parents()`` on the seed (``os.getpid()``).
``ancestor_exe`` is the executable path reported by each ancestor's
``.exe()``; when it matches one of our shim paths the ancestor is
excluded (the launcher-shim case). Pass ``None`` to model an ancestor
whose exe can't be read (psutil error) — it stays in the candidate set.
"""
class _FakeProc:
def __init__(self, pid: int, chain: list[int]):
def __init__(self, pid: int, exe_path: str | None):
self.pid = pid
self._chain = chain
self._exe = exe_path
def parent(self):
if not self._chain:
return None
next_pid = self._chain[0]
return _FakeProc(next_pid, self._chain[1:])
def exe(self):
if self._exe is None:
raise OSError("exe unavailable")
return self._exe
def parents(self):
return [_FakeProc(p, ancestor_exe) for p in parent_chain]
class _NoSuchProcess(Exception):
pass
@ -153,8 +160,8 @@ def _fake_psutil_with_parent_chain(
class _AccessDenied(Exception):
pass
def _process(pid):
return _FakeProc(pid, list(parent_chain))
def _process(pid=None):
return _FakeProc(pid if pid is not None else os.getpid(), ancestor_exe)
return types.SimpleNamespace(
Process=_process,
@ -185,6 +192,7 @@ def test_detect_concurrent_excludes_parent_chain(_winp, tmp_path):
fake_psutil = _fake_psutil_with_parent_chain(
parent_chain=[launcher_pid],
proc_iter_rows=rows,
ancestor_exe=str(shim),
)
with patch.dict(sys.modules, {"psutil": fake_psutil}):
result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
@ -211,6 +219,7 @@ def test_detect_concurrent_still_finds_unrelated_other_hermes(_winp, tmp_path):
fake_psutil = _fake_psutil_with_parent_chain(
parent_chain=[launcher_pid],
proc_iter_rows=rows,
ancestor_exe=str(shim),
)
with patch.dict(sys.modules, {"psutil": fake_psutil}):
result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
@ -238,6 +247,7 @@ def test_detect_concurrent_parent_chain_walks_deep(_winp, tmp_path):
fake_psutil = _fake_psutil_with_parent_chain(
parent_chain=[parent_pid, grandparent_pid, greatgrandparent_pid],
proc_iter_rows=rows,
ancestor_exe=str(shim),
)
with patch.dict(sys.modules, {"psutil": fake_psutil}):
result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
@ -246,25 +256,38 @@ def test_detect_concurrent_parent_chain_walks_deep(_winp, tmp_path):
@patch.object(cli_main, "_is_windows", return_value=True)
def test_detect_concurrent_parent_walk_handles_cycle(_winp, tmp_path):
"""A PID cycle in the parent chain must not hang the walk."""
def test_detect_concurrent_parents_call_robust_to_one_bad_hop(_winp, tmp_path):
"""The launcher shim is still excluded even when an ancestor exe is unreadable.
Field regression (issues #29341, #34795): the old per-hop ``parent()``
walk bailed on the FIRST psutil error, so an AccessDenied on any hop left
the launcher shim in the candidate set and re-triggered the false
positive. ``parents()`` returns the whole list at once; we evaluate each
ancestor independently, so one unreadable hop never strands the launcher.
"""
scripts_dir = tmp_path
shim = scripts_dir / "hermes.exe"
shim.write_bytes(b"")
me = os.getpid()
bogus_loop_pid = me + 1
launcher_pid = me + 100
rows = [_make_proc(me, str(shim), "python.exe")]
# Chain that points back to ``me`` — the loop-detection branch must break.
rows = [
_make_proc(me, str(shim), "python.exe"),
_make_proc(launcher_pid, str(shim), "hermes.exe"),
]
# ancestor_exe=None → every ancestor's .exe() raises OSError. The helper
# must swallow it per-ancestor and not crash; the launcher won't be
# excluded in this degenerate case, but a real run reads the shim exe.
fake_psutil = _fake_psutil_with_parent_chain(
parent_chain=[bogus_loop_pid, me, bogus_loop_pid],
parent_chain=[launcher_pid],
proc_iter_rows=rows,
ancestor_exe=None,
)
with patch.dict(sys.modules, {"psutil": fake_psutil}):
result = cli_main._detect_concurrent_hermes_instances(scripts_dir)
# No crash, no hang; self + bogus_loop_pid excluded; no others reported.
assert result == []
# No crash; helper completes. (Degenerate stub: launcher exe unreadable.)
assert result == [(launcher_pid, "hermes.exe")]
@patch.object(cli_main, "_is_windows", return_value=True)
@ -310,6 +333,11 @@ def test_format_message_mentions_pids_and_remediation(tmp_path):
assert "--force" in msg
# Mentions the file that would have been overwritten
assert str(tmp_path / "hermes.exe") in msg
# Self-service kill command targets the exact stale PIDs (issue #34795).
assert "taskkill" in msg
assert "/PID 1234" in msg
assert "/PID 5678" in msg
assert "/F" in msg
# ---------------------------------------------------------------------------

View file

@ -0,0 +1,311 @@
"""Tests for uv-tool install detection in the update path (issue #29700).
``uv tool install hermes-agent`` lives outside any venv, so the previous
``uv pip install --upgrade`` update path failed with ``No virtual
environment found``. ``is_uv_tool_install`` should detect this layout and
both the user-facing recommended command and the actual
``_cmd_update_pip`` subprocess invocation should switch to
``uv tool upgrade hermes-agent``.
Detection is restricted to properties of the running interpreter
(``sys.prefix`` / ``sys.executable``) so a pip/venv install on a machine
that also has ``uv tool install hermes-agent`` does not get misclassified.
"""
from __future__ import annotations
import subprocess
from types import SimpleNamespace
from unittest.mock import patch
import pytest
# ---------------------------------------------------------------------------
# is_uv_tool_install
# ---------------------------------------------------------------------------
class TestIsUvToolInstall:
def test_returns_true_when_sys_prefix_matches_uv_tool_layout(self):
from hermes_cli import config
with patch.object(config.sys, "prefix", "/home/user/.local/share/uv/tools/hermes-agent"):
assert config.is_uv_tool_install() is True
def test_returns_true_when_sys_executable_matches_uv_tool_layout(self):
"""Some uv-tool layouts surface the marker on ``sys.executable`` (bin/python)."""
from hermes_cli import config
with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
patch.object(
config.sys,
"executable",
"/home/user/.local/share/uv/tools/hermes-agent/bin/python",
):
assert config.is_uv_tool_install() is True
def test_returns_false_when_neither_prefix_nor_executable_matches(self):
from hermes_cli import config
with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
patch.object(config.sys, "executable", "/usr/bin/python3"):
assert config.is_uv_tool_install() is False
def test_does_not_consult_uv_tool_list(self):
"""Detection must NOT shell out: ``uv tool list`` would false-positive
when the active install is pip/venv but the machine also has
``uv tool install hermes-agent`` somewhere on disk. Copilot review on
PR #29703 flagged this; the fix is to never call ``uv tool list``
from the detection path."""
from hermes_cli import config
with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
patch.object(config.sys, "executable", "/usr/bin/python3"), \
patch("subprocess.run") as mock_run:
assert config.is_uv_tool_install() is False
mock_run.assert_not_called()
def test_case_insensitive_match(self):
"""Match must be case-insensitive — Windows paths preserve case
(e.g. ``...AppData\\Local\\UV\\Tools\\hermes-agent``) and a case-sensitive
check would miss them. We exercise the lower-cased compare path here
without monkey-patching ``os.sep``, which would break the whole suite."""
from hermes_cli import config
with patch.object(
config.sys, "prefix", "/HOME/USER/.local/share/UV/Tools/hermes-agent"
):
assert config.is_uv_tool_install() is True
def test_handles_empty_executable(self):
from hermes_cli import config
with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
patch.object(config.sys, "executable", ""):
assert config.is_uv_tool_install() is False
# ---------------------------------------------------------------------------
# recommended_update_command_for_method
# ---------------------------------------------------------------------------
class TestRecommendedUpdateCommandForUvTool:
def test_uv_tool_install_recommends_uv_tool_upgrade(self):
from hermes_cli import config
with patch("shutil.which", return_value="/usr/local/bin/uv"), \
patch.object(config, "is_uv_tool_install", return_value=True):
cmd = config.recommended_update_command_for_method("pip")
assert cmd == "uv tool upgrade hermes-agent"
def test_uv_tool_install_recommends_uv_tool_upgrade_even_without_uv_on_path(self):
"""Recommendation reflects the *install method*, not whether ``uv`` is
currently on PATH the user needs to know the right command to run."""
from hermes_cli import config
with patch("shutil.which", return_value=None), \
patch.object(config, "is_uv_tool_install", return_value=True):
cmd = config.recommended_update_command_for_method("pip")
assert cmd == "uv tool upgrade hermes-agent"
def test_uv_pip_install_keeps_legacy_recommendation(self):
"""Existing behavior: uv is on PATH but Hermes is a regular pip install."""
from hermes_cli import config
with patch("shutil.which", return_value="/usr/local/bin/uv"), \
patch.object(config, "is_uv_tool_install", return_value=False):
cmd = config.recommended_update_command_for_method("pip")
assert cmd == "uv pip install --upgrade hermes-agent"
def test_no_uv_falls_back_to_plain_pip(self):
from hermes_cli import config
with patch("shutil.which", return_value=None), \
patch.object(config, "is_uv_tool_install", return_value=False):
cmd = config.recommended_update_command_for_method("pip")
assert cmd == "pip install --upgrade hermes-agent"
def test_recommendation_does_not_spawn_subprocess(self):
"""Computing the recommendation string must be cheap — no ``uv tool list``
spawn. Copilot review on PR #29703 flagged the prior subprocess hop
as adding overhead and a multi-second timeout window for what is
purely a display string."""
from hermes_cli import config
with patch.object(config.sys, "prefix", "/some/unrelated/venv"), \
patch.object(config.sys, "executable", "/usr/bin/python3"), \
patch("shutil.which", return_value="/usr/local/bin/uv"), \
patch("subprocess.run") as mock_run:
cmd = config.recommended_update_command_for_method("pip")
mock_run.assert_not_called()
assert cmd == "uv pip install --upgrade hermes-agent"
# ---------------------------------------------------------------------------
# _cmd_update_pip subprocess command
# ---------------------------------------------------------------------------
class TestCmdUpdatePipUsesUvTool:
@patch("subprocess.run")
def test_runs_uv_tool_upgrade_when_uv_tool_install(self, mock_run):
"""The actual subprocess invocation must switch to ``uv tool upgrade``."""
from hermes_cli.main import _cmd_update_pip
mock_run.return_value = subprocess.CompletedProcess(["uv"], 0, stdout="", stderr="")
with patch("shutil.which", return_value="/usr/local/bin/uv"), \
patch("hermes_cli.config.is_uv_tool_install", return_value=True):
_cmd_update_pip(SimpleNamespace())
assert mock_run.call_args[0][0] == ["/usr/local/bin/uv", "tool", "upgrade", "hermes-agent"]
@patch("subprocess.run")
def test_runs_uv_pip_install_when_not_uv_tool(self, mock_run):
"""Existing behavior preserved when uv is present but Hermes isn't a tool install."""
from hermes_cli.main import _cmd_update_pip
mock_run.return_value = subprocess.CompletedProcess(["uv"], 0, stdout="", stderr="")
with patch("shutil.which", return_value="/usr/local/bin/uv"), \
patch("hermes_cli.config.is_uv_tool_install", return_value=False):
_cmd_update_pip(SimpleNamespace())
assert mock_run.call_args[0][0] == [
"/usr/local/bin/uv",
"pip",
"install",
"--upgrade",
"hermes-agent",
]
@patch("subprocess.run")
def test_falls_back_to_pip_when_no_uv(self, mock_run):
from hermes_cli.main import _cmd_update_pip
mock_run.return_value = subprocess.CompletedProcess(["pip"], 0, stdout="", stderr="")
with patch("shutil.which", return_value=None), \
patch("hermes_cli.config.is_uv_tool_install", return_value=False):
_cmd_update_pip(SimpleNamespace())
cmd = mock_run.call_args[0][0]
assert cmd[1:] == ["-m", "pip", "install", "--upgrade", "hermes-agent"]
@patch("subprocess.run")
def test_exits_nonzero_on_subprocess_failure(self, mock_run):
from hermes_cli.main import _cmd_update_pip
mock_run.return_value = subprocess.CompletedProcess(["uv"], 1, stdout="", stderr="")
with patch("shutil.which", return_value="/usr/local/bin/uv"), \
patch("hermes_cli.config.is_uv_tool_install", return_value=True):
with pytest.raises(SystemExit) as exc_info:
_cmd_update_pip(SimpleNamespace())
assert exc_info.value.code == 1
@patch("subprocess.run")
def test_uv_tool_install_without_uv_on_path_exits_with_hint(self, mock_run):
"""If the running interpreter looks like a uv-tool install but ``uv`` is
somehow missing from PATH, surface a clear hint instead of silently
falling back to ``python -m pip``, which would either fail (no venv)
or upgrade the wrong copy."""
from hermes_cli.main import _cmd_update_pip
with patch("shutil.which", return_value=None), \
patch("hermes_cli.config.is_uv_tool_install", return_value=True):
with pytest.raises(SystemExit) as exc_info:
_cmd_update_pip(SimpleNamespace())
assert exc_info.value.code == 1
mock_run.assert_not_called()
# ---------------------------------------------------------------------------
# pipx-managed installs, --system fallback, and VIRTUAL_ENV overlay
# (issue #29700 / #35031 family — consolidated update-path handling)
# ---------------------------------------------------------------------------
class TestCmdUpdatePipInstallLayouts:
"""The uv pip path must adapt to where the running interpreter lives:
- inside a venv (launcher shim) -> export VIRTUAL_ENV, no ``--system``
- bare pip outside any venv -> add ``--system``, no overlay
- pipx-managed -> ``pipx upgrade``
"""
@patch("subprocess.run")
def test_pipx_managed_uses_pipx_upgrade(self, mock_run, monkeypatch):
from hermes_cli import main as hm
mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
monkeypatch.setattr(hm.sys, "prefix", "/home/u/.local/pipx/venvs/hermes-agent")
monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
def _which(name):
return {"uv": "/usr/bin/uv", "pipx": "/usr/bin/pipx"}.get(name)
with patch("shutil.which", side_effect=_which), \
patch("hermes_cli.config.is_uv_tool_install", return_value=False):
hm._cmd_update_pip(SimpleNamespace())
assert mock_run.call_args[0][0] == ["/usr/bin/pipx", "upgrade", "hermes-agent"]
# pipx upgrade ignores VIRTUAL_ENV; we must not set it.
assert "env" not in mock_run.call_args.kwargs
@patch("subprocess.run")
def test_pipx_layout_without_pipx_binary_treated_as_venv(
self, mock_run, monkeypatch
):
from hermes_cli import main as hm
mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
monkeypatch.setattr(hm.sys, "prefix", "/home/u/.local/pipx/venvs/hermes-agent")
monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
# pipx layout detected via prefix, but pipx binary missing on PATH.
def _which(name):
return "/usr/bin/uv" if name == "uv" else None
with patch("shutil.which", side_effect=_which), \
patch("hermes_cli.config.is_uv_tool_install", return_value=False):
hm._cmd_update_pip(SimpleNamespace())
# prefix != base_prefix, so this is treated as a venv -> overlay, no --system.
assert mock_run.call_args[0][0] == [
"/usr/bin/uv", "pip", "install", "--upgrade", "hermes-agent",
]
assert mock_run.call_args.kwargs["env"]["VIRTUAL_ENV"].endswith("hermes-agent")
@patch("subprocess.run")
def test_bare_pip_outside_venv_adds_system(self, mock_run, monkeypatch):
from hermes_cli import main as hm
mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
# No venv: prefix == base_prefix.
monkeypatch.setattr(hm.sys, "prefix", "/usr")
monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
with patch("shutil.which", return_value="/usr/bin/uv"), \
patch("hermes_cli.config.is_uv_tool_install", return_value=False):
hm._cmd_update_pip(SimpleNamespace())
assert mock_run.call_args[0][0] == [
"/usr/bin/uv", "pip", "install", "--system", "--upgrade", "hermes-agent",
]
assert "env" not in mock_run.call_args.kwargs
@patch("subprocess.run")
def test_venv_exports_virtualenv_and_omits_system(self, mock_run, monkeypatch):
from hermes_cli import main as hm
mock_run.return_value = subprocess.CompletedProcess([], 0, stdout="", stderr="")
monkeypatch.delenv("VIRTUAL_ENV", raising=False)
monkeypatch.setattr(hm.sys, "prefix", "/home/u/.hermes/hermes-agent/venv")
monkeypatch.setattr(hm.sys, "base_prefix", "/usr")
with patch("shutil.which", return_value="/usr/bin/uv"), \
patch("hermes_cli.config.is_uv_tool_install", return_value=False):
hm._cmd_update_pip(SimpleNamespace())
cmd = mock_run.call_args[0][0]
assert "--system" not in cmd
assert cmd == ["/usr/bin/uv", "pip", "install", "--upgrade", "hermes-agent"]
assert mock_run.call_args.kwargs["env"]["VIRTUAL_ENV"] == "/home/u/.hermes/hermes-agent/venv"

View file

@ -249,9 +249,12 @@ class TestFlushAll:
mgr = _make_manager(write_frequency="async")
sess = _make_session()
sess.add_message("user", "pending")
mgr._async_queue.put(sess)
with patch.object(mgr, "_flush_session") as mock_flush:
# Put the item AFTER the mock is installed so the background
# writer thread (if it dequeues before flush_all) still hits
# the mock rather than the real _flush_session.
mgr._async_queue.put(sess)
mgr.flush_all()
# Called at least once for the queued item
assert mock_flush.call_count >= 1

Some files were not shown because too many files have changed in this diff Show more