Merge remote-tracking branch 'origin/main' into sid/types-and-lints

# Conflicts:
#	gateway/platforms/base.py
#	gateway/platforms/qqbot/adapter.py
#	gateway/platforms/slack.py
#	hermes_cli/main.py
#	scripts/batch_runner.py
#	tools/skills_tool.py
#	uv.lock
This commit is contained in:
alt-glitch 2026-04-21 20:28:45 +05:30
commit a9ed7cb3b4
117 changed files with 7791 additions and 611 deletions

View file

@ -566,3 +566,52 @@ python -m pytest tests/ -q -n 4
Worker count above 4 will surface test-ordering flakes that CI never sees.
Always run the full suite before pushing changes.
### Don't write change-detector tests
A test is a **change-detector** if it fails whenever data that is **expected
to change** gets updated — model catalogs, config version numbers,
enumeration counts, hardcoded lists of provider models. These tests add no
behavioral coverage; they just guarantee that routine source updates break
CI and cost engineering time to "fix."
**Do not write:**
```python
# catalog snapshot — breaks every model release
assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
assert "MiniMax-M2.7" in models
# config version literal — breaks every schema bump
assert DEFAULT_CONFIG["_config_version"] == 21
# enumeration count — breaks every time a skill/provider is added
assert len(_PROVIDER_MODELS["huggingface"]) == 8
```
**Do write:**
```python
# behavior: does the catalog plumbing work at all?
assert "gemini" in _PROVIDER_MODELS
assert len(_PROVIDER_MODELS["gemini"]) >= 1
# behavior: does migration bump the user's version to current latest?
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
# invariant: no plan-only model leaks into the legacy list
assert not (set(moonshot_models) & coding_plan_only_models)
# invariant: every model in the catalog has a context-length entry
for m in _PROVIDER_MODELS["huggingface"]:
assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
```
The rule: if the test reads like a snapshot of current data, delete it. If
it reads like a contract about how two pieces of data must relate, keep it.
When a PR adds a new provider/model and you want a test, make the test
assert the relationship (e.g. "catalog entries all have context lengths"),
not the specific names.
Reviewers should reject new change-detector tests; authors should convert
them into invariants before re-requesting review.

View file

@ -63,6 +63,9 @@ def make_approval_callback(
logger.warning("Permission request timed out or failed: %s", exc)
return "deny"
if response is None:
return "deny"
outcome = response.outcome
if isinstance(outcome, AllowedOutcome):
option_id = outcome.option_id

View file

@ -4,6 +4,7 @@ from __future__ import annotations
import asyncio
import logging
import os
from collections import defaultdict, deque
from concurrent.futures import ThreadPoolExecutor
from typing import Any, Deque, Optional
@ -51,7 +52,7 @@ try:
except ImportError:
from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined]
from acp_adapter.auth import detect_provider, has_provider
from acp_adapter.auth import detect_provider
from acp_adapter.events import (
make_message_cb,
make_step_cb,
@ -71,6 +72,11 @@ except Exception:
# Thread pool for running AIAgent (synchronous) in parallel.
_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")
# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
# does not expose a client-side limit, so this is a fixed cap that clients
# paginate against using `cursor` / `next_cursor`.
_LIST_SESSIONS_PAGE_SIZE = 50
def _extract_text(
prompt: list[
@ -351,9 +357,18 @@ class HermesACPAgent(acp.Agent):
)
async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
if has_provider():
return AuthenticateResponse()
return None
# Only accept authenticate() calls whose method_id matches the
# provider we advertised in initialize(). Without this check,
# authenticate() would acknowledge any method_id as long as the
# server has provider credentials configured — harmless under
# Hermes' threat model (ACP is stdio-only, local-trust), but poor
# API hygiene and confusing if ACP ever grows multi-method auth.
provider = detect_provider()
if not provider:
return None
if not isinstance(method_id, str) or method_id.strip().lower() != provider:
return None
return AuthenticateResponse()
# ---- Session management -------------------------------------------------
@ -437,7 +452,28 @@ class HermesACPAgent(acp.Agent):
cwd: str | None = None,
**kwargs: Any,
) -> ListSessionsResponse:
"""List ACP sessions with optional ``cwd`` filtering and cursor pagination.
``cwd`` is passed through to ``SessionManager.list_sessions`` which already
normalizes and filters by working directory. ``cursor`` is a ``session_id``
previously returned as ``next_cursor``; results resume after that entry.
Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
results remain, ``next_cursor`` is set to the last returned ``session_id``.
"""
infos = self.session_manager.list_sessions(cwd=cwd)
if cursor:
for idx, s in enumerate(infos):
if s["session_id"] == cursor:
infos = infos[idx + 1:]
break
else:
# Unknown cursor -> empty page (do not fall back to full list).
infos = []
has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
sessions = []
for s in infos:
updated_at = s.get("updated_at")
@ -451,7 +487,9 @@ class HermesACPAgent(acp.Agent):
updated_at=updated_at,
)
)
return ListSessionsResponse(sessions=sessions)
next_cursor = sessions[-1].session_id if has_more and sessions else None
return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
# ---- Prompt (core) ------------------------------------------------------
@ -517,15 +555,32 @@ class HermesACPAgent(acp.Agent):
agent.step_callback = step_cb
agent.message_callback = message_cb
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
_terminal_tool.set_approval_callback(approval_cb)
except Exception:
logger.debug("Could not set ACP approval callback", exc_info=True)
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
# Set it INSIDE _run_agent so the TLS write happens in the executor
# thread — setting it here would write to the event-loop thread's TLS,
# not the executor's. Also set HERMES_INTERACTIVE so approval.py
# takes the CLI-interactive path (which calls the registered
# callback via prompt_dangerous_approval) instead of the
# non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
# ACP's conn.request_permission maps cleanly to the interactive
# callback shape — not the gateway-queue HERMES_EXEC_ASK path,
# which requires a notify_cb registered in _gateway_notify_cbs.
previous_approval_cb = None
previous_interactive = None
def _run_agent() -> dict:
nonlocal previous_approval_cb, previous_interactive
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool
previous_approval_cb = _terminal_tool._get_approval_callback()
_terminal_tool.set_approval_callback(approval_cb)
except Exception:
logger.debug("Could not set ACP approval callback", exc_info=True)
# Signal to tools.approval that we have an interactive callback
# and the non-interactive auto-approve path must not fire.
previous_interactive = os.environ.get("HERMES_INTERACTIVE")
os.environ["HERMES_INTERACTIVE"] = "1"
try:
result = agent.run_conversation(
user_message=user_text,
@ -537,6 +592,11 @@ class HermesACPAgent(acp.Agent):
logger.exception("Agent error in session %s", session_id)
return {"final_response": f"Error: {e}", "messages": state.history}
finally:
# Restore HERMES_INTERACTIVE.
if previous_interactive is None:
os.environ.pop("HERMES_INTERACTIVE", None)
else:
os.environ["HERMES_INTERACTIVE"] = previous_interactive
if approval_cb:
try:
from tools import terminal_tool as _terminal_tool

326
agent/account_usage.py Normal file
View file

@ -0,0 +1,326 @@
from __future__ import annotations
from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Optional
import httpx
from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
from hermes_cli.runtime_provider import resolve_runtime_provider
def _utc_now() -> datetime:
return datetime.now(timezone.utc)
@dataclass(frozen=True)
class AccountUsageWindow:
label: str
used_percent: Optional[float] = None
reset_at: Optional[datetime] = None
detail: Optional[str] = None
@dataclass(frozen=True)
class AccountUsageSnapshot:
provider: str
source: str
fetched_at: datetime
title: str = "Account limits"
plan: Optional[str] = None
windows: tuple[AccountUsageWindow, ...] = ()
details: tuple[str, ...] = ()
unavailable_reason: Optional[str] = None
@property
def available(self) -> bool:
return bool(self.windows or self.details) and not self.unavailable_reason
def _title_case_slug(value: Optional[str]) -> Optional[str]:
cleaned = str(value or "").strip()
if not cleaned:
return None
return cleaned.replace("_", " ").replace("-", " ").title()
def _parse_dt(value: Any) -> Optional[datetime]:
if value in (None, ""):
return None
if isinstance(value, (int, float)):
return datetime.fromtimestamp(float(value), tz=timezone.utc)
if isinstance(value, str):
text = value.strip()
if not text:
return None
if text.endswith("Z"):
text = text[:-1] + "+00:00"
try:
dt = datetime.fromisoformat(text)
return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
except ValueError:
return None
return None
def _format_reset(dt: Optional[datetime]) -> str:
if not dt:
return "unknown"
local_dt = dt.astimezone()
delta = dt - _utc_now()
total_seconds = int(delta.total_seconds())
if total_seconds <= 0:
return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
hours, rem = divmod(total_seconds, 3600)
minutes = rem // 60
if hours >= 24:
days, hours = divmod(hours, 24)
rel = f"in {days}d {hours}h"
elif hours > 0:
rel = f"in {hours}h {minutes}m"
else:
rel = f"in {minutes}m"
return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
if not snapshot:
return []
header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
lines = [header]
if snapshot.plan:
lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
else:
lines.append(f"Provider: {snapshot.provider}")
for window in snapshot.windows:
if window.used_percent is None:
base = f"{window.label}: unavailable"
else:
remaining = max(0, round(100 - float(window.used_percent)))
used = max(0, round(float(window.used_percent)))
base = f"{window.label}: {remaining}% remaining ({used}% used)"
if window.reset_at:
base += f" • resets {_format_reset(window.reset_at)}"
elif window.detail:
base += f"{window.detail}"
lines.append(base)
for detail in snapshot.details:
lines.append(detail)
if snapshot.unavailable_reason:
lines.append(f"Unavailable: {snapshot.unavailable_reason}")
return lines
def _resolve_codex_usage_url(base_url: str) -> str:
normalized = (base_url or "").strip().rstrip("/")
if not normalized:
normalized = "https://chatgpt.com/backend-api/codex"
if normalized.endswith("/codex"):
normalized = normalized[: -len("/codex")]
if "/backend-api" in normalized:
return normalized + "/wham/usage"
return normalized + "/api/codex/usage"
def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
token_data = _read_codex_tokens()
tokens = token_data.get("tokens") or {}
account_id = str(tokens.get("account_id", "") or "").strip() or None
headers = {
"Authorization": f"Bearer {creds['api_key']}",
"Accept": "application/json",
"User-Agent": "codex-cli",
}
if account_id:
headers["ChatGPT-Account-Id"] = account_id
with httpx.Client(timeout=15.0) as client:
response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
response.raise_for_status()
payload = response.json() or {}
rate_limit = payload.get("rate_limit") or {}
windows: list[AccountUsageWindow] = []
for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
window = rate_limit.get(key) or {}
used = window.get("used_percent")
if used is None:
continue
windows.append(
AccountUsageWindow(
label=label,
used_percent=float(used),
reset_at=_parse_dt(window.get("reset_at")),
)
)
details: list[str] = []
credits = payload.get("credits") or {}
if credits.get("has_credits"):
balance = credits.get("balance")
if isinstance(balance, (int, float)):
details.append(f"Credits balance: ${float(balance):.2f}")
elif credits.get("unlimited"):
details.append("Credits balance: unlimited")
return AccountUsageSnapshot(
provider="openai-codex",
source="usage_api",
fetched_at=_utc_now(),
plan=_title_case_slug(payload.get("plan_type")),
windows=tuple(windows),
details=tuple(details),
)
def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
token = (resolve_anthropic_token() or "").strip()
if not token:
return None
if not _is_oauth_token(token):
return AccountUsageSnapshot(
provider="anthropic",
source="oauth_usage_api",
fetched_at=_utc_now(),
unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
)
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
"Content-Type": "application/json",
"anthropic-beta": "oauth-2025-04-20",
"User-Agent": "claude-code/2.1.0",
}
with httpx.Client(timeout=15.0) as client:
response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
response.raise_for_status()
payload = response.json() or {}
windows: list[AccountUsageWindow] = []
mapping = (
("five_hour", "Current session"),
("seven_day", "Current week"),
("seven_day_opus", "Opus week"),
("seven_day_sonnet", "Sonnet week"),
)
for key, label in mapping:
window = payload.get(key) or {}
util = window.get("utilization")
if util is None:
continue
used = float(util) * 100 if float(util) <= 1 else float(util)
windows.append(
AccountUsageWindow(
label=label,
used_percent=used,
reset_at=_parse_dt(window.get("resets_at")),
)
)
details: list[str] = []
extra = payload.get("extra_usage") or {}
if extra.get("is_enabled"):
used_credits = extra.get("used_credits")
monthly_limit = extra.get("monthly_limit")
currency = extra.get("currency") or "USD"
if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
details.append(
f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
)
return AccountUsageSnapshot(
provider="anthropic",
source="oauth_usage_api",
fetched_at=_utc_now(),
windows=tuple(windows),
details=tuple(details),
)
def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
runtime = resolve_runtime_provider(
requested="openrouter",
explicit_base_url=base_url,
explicit_api_key=api_key,
)
token = str(runtime.get("api_key", "") or "").strip()
if not token:
return None
normalized = str(runtime.get("base_url", "") or "").rstrip("/")
credits_url = f"{normalized}/credits"
key_url = f"{normalized}/key"
headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/json",
}
with httpx.Client(timeout=10.0) as client:
credits_resp = client.get(credits_url, headers=headers)
credits_resp.raise_for_status()
credits = (credits_resp.json() or {}).get("data") or {}
try:
key_resp = client.get(key_url, headers=headers)
key_resp.raise_for_status()
key_data = (key_resp.json() or {}).get("data") or {}
except Exception:
key_data = {}
total_credits = float(credits.get("total_credits") or 0.0)
total_usage = float(credits.get("total_usage") or 0.0)
details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
windows: list[AccountUsageWindow] = []
limit = key_data.get("limit")
limit_remaining = key_data.get("limit_remaining")
limit_reset = str(key_data.get("limit_reset") or "").strip()
usage = key_data.get("usage")
if (
isinstance(limit, (int, float))
and float(limit) > 0
and isinstance(limit_remaining, (int, float))
and 0 <= float(limit_remaining) <= float(limit)
):
limit_value = float(limit)
remaining_value = float(limit_remaining)
used_percent = ((limit_value - remaining_value) / limit_value) * 100
detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
if limit_reset:
detail_parts.append(f"resets {limit_reset}")
windows.append(
AccountUsageWindow(
label="API key quota",
used_percent=used_percent,
detail="".join(detail_parts),
)
)
if isinstance(usage, (int, float)):
usage_parts = [f"API key usage: ${float(usage):.2f} total"]
for value, label in (
(key_data.get("usage_daily"), "today"),
(key_data.get("usage_weekly"), "this week"),
(key_data.get("usage_monthly"), "this month"),
):
if isinstance(value, (int, float)) and float(value) > 0:
usage_parts.append(f"${float(value):.2f} {label}")
details.append("".join(usage_parts))
return AccountUsageSnapshot(
provider="openrouter",
source="credits_api",
fetched_at=_utc_now(),
windows=tuple(windows),
details=tuple(details),
)
def fetch_account_usage(
provider: Optional[str],
*,
base_url: Optional[str] = None,
api_key: Optional[str] = None,
) -> Optional[AccountUsageSnapshot]:
normalized = str(provider or "").strip().lower()
if normalized in {"", "auto", "custom"}:
return None
try:
if normalized == "openai-codex":
return _fetch_codex_account_usage()
if normalized == "anthropic":
return _fetch_anthropic_account_usage()
if normalized == "openrouter":
return _fetch_openrouter_account_usage(base_url, api_key)
except Exception:
return None
return None

View file

@ -19,6 +19,7 @@ from pathlib import Path
from hermes_constants import get_hermes_home
from types import SimpleNamespace
from typing import Any, Dict, List, Optional, Tuple
from utils import normalize_proxy_env_vars
try:
import anthropic as _anthropic_sdk
@ -308,6 +309,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional
"The 'anthropic' package is required for the Anthropic provider. "
"Install it with: pip install 'anthropic>=0.39.0'"
)
normalize_proxy_env_vars()
from httpx import Timeout
normalized_base_url = _normalize_base_url_text(base_url)
@ -1525,3 +1529,42 @@ def normalize_anthropic_response(
),
finish_reason,
)
def normalize_anthropic_response_v2(
response,
strip_tool_prefix: bool = False,
) -> "NormalizedResponse":
"""Normalize Anthropic response to NormalizedResponse.
Wraps the existing normalize_anthropic_response() and maps its output
to the shared transport types. This allows incremental migration
one call site at a time without changing the original function.
"""
from agent.transports.types import NormalizedResponse, build_tool_call
assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
tool_calls = None
if assistant_msg.tool_calls:
tool_calls = [
build_tool_call(
id=tc.id,
name=tc.function.name,
arguments=tc.function.arguments,
)
for tc in assistant_msg.tool_calls
]
provider_data = {}
if getattr(assistant_msg, "reasoning_details", None):
provider_data["reasoning_details"] = assistant_msg.reasoning_details
return NormalizedResponse(
content=assistant_msg.content,
tool_calls=tool_calls,
finish_reason=finish_reason,
reasoning=getattr(assistant_msg, "reasoning", None),
usage=None, # Anthropic usage is on the raw response, not the normaliser
provider_data=provider_data or None,
)

View file

@ -51,7 +51,7 @@ if TYPE_CHECKING:
from agent.credential_pool import load_pool
from hermes_cli.config import get_hermes_home
from hermes_constants import OPENROUTER_BASE_URL
from utils import base_url_host_matches, base_url_hostname
from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
logger = logging.getLogger(__name__)
@ -1036,6 +1036,8 @@ def _validate_proxy_env_urls() -> None:
"""
from urllib.parse import urlparse
normalize_proxy_env_vars()
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = str(os.environ.get(key) or "").strip()

View file

@ -21,6 +21,9 @@ from pathlib import Path
from types import SimpleNamespace
from typing import Any
from agent.file_safety import get_read_block_error, is_write_denied
from agent.redact import redact_sensitive_text
ACP_MARKER_BASE_URL = "acp://copilot"
_DEFAULT_TIMEOUT_SECONDS = 900.0
@ -54,6 +57,18 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
}
def _permission_denied(message_id: Any) -> dict[str, Any]:
return {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "cancelled",
}
},
}
def _format_messages_as_prompt(
messages: list[dict[str, Any]],
model: str | None = None,
@ -535,18 +550,13 @@ class CopilotACPClient:
params = msg.get("params") or {}
if method == "session/request_permission":
response = {
"jsonrpc": "2.0",
"id": message_id,
"result": {
"outcome": {
"outcome": "allow_once",
}
},
}
response = _permission_denied(message_id)
elif method == "fs/read_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
block_error = get_read_block_error(str(path))
if block_error:
raise PermissionError(block_error)
content = path.read_text() if path.exists() else ""
line = params.get("line")
limit = params.get("limit")
@ -555,6 +565,8 @@ class CopilotACPClient:
start = line - 1
end = start + limit if isinstance(limit, int) and limit > 0 else None
content = "".join(lines[start:end])
if content:
content = redact_sensitive_text(content)
response = {
"jsonrpc": "2.0",
"id": message_id,
@ -567,6 +579,10 @@ class CopilotACPClient:
elif method == "fs/write_text_file":
try:
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
if is_write_denied(str(path)):
raise PermissionError(
f"Write denied: '{path}' is a protected system/credential file."
)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(str(params.get("content") or ""))
response = {

View file

@ -998,6 +998,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
active_sources: Set[str] = set()
auth_store = _load_auth_store()
# Shared suppression gate — used at every upsert site so
# `hermes auth remove <provider> <N>` is stable across all source types.
try:
from hermes_cli.auth import is_source_suppressed as _is_suppressed
except ImportError:
def _is_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "anthropic":
# Only auto-discover external credentials (Claude Code, Hermes PKCE)
# when the user has explicitly configured anthropic as their provider.
@ -1017,13 +1025,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
("claude_code", read_claude_code_credentials()),
):
if creds and creds.get("accessToken"):
# Check if user explicitly removed this source
try:
from hermes_cli.auth import is_source_suppressed
if is_source_suppressed(provider, source_name):
continue
except ImportError:
pass
if _is_suppressed(provider, source_name):
continue
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
@ -1041,7 +1044,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
elif provider == "nous":
state = _load_provider_state(auth_store, "nous")
if state:
if state and not _is_suppressed(provider, "device_code"):
active_sources.add("device_code")
# Prefer a user-supplied label embedded in the singleton state
# (set by persist_nous_credentials(label=...) when the user ran
@ -1082,20 +1085,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
token, source = resolve_copilot_token()
if token:
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
if not _is_suppressed(provider, source_name):
active_sources.add(source_name)
pconfig = PROVIDER_REGISTRY.get(provider)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": token,
"base_url": pconfig.inference_base_url if pconfig else "",
"label": source,
},
)
except Exception as exc:
logger.debug("Copilot token seed failed: %s", exc)
@ -1111,20 +1115,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
token = creds.get("api_key", "")
if token:
source_name = creds.get("source", "qwen-cli")
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_OAUTH,
"access_token": token,
"expires_at_ms": creds.get("expires_at_ms"),
"base_url": creds.get("base_url", ""),
"label": creds.get("auth_file", source_name),
},
)
if not _is_suppressed(provider, source_name):
active_sources.add(source_name)
changed |= _upsert_entry(
entries,
provider,
source_name,
{
"source": source_name,
"auth_type": AUTH_TYPE_OAUTH,
"access_token": token,
"expires_at_ms": creds.get("expires_at_ms"),
"base_url": creds.get("base_url", ""),
"label": creds.get("auth_file", source_name),
},
)
except Exception as exc:
logger.debug("Qwen OAuth token seed failed: %s", exc)
@ -1133,13 +1138,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
# the device_code source as suppressed so it won't be re-seeded from
# the Hermes auth store. Without this gate the removal is instantly
# undone on the next load_pool() call.
codex_suppressed = False
try:
from hermes_cli.auth import is_source_suppressed
codex_suppressed = is_source_suppressed(provider, "device_code")
except ImportError:
pass
if codex_suppressed:
if _is_suppressed(provider, "device_code"):
return changed, active_sources
state = _load_provider_state(auth_store, "openai-codex")
@ -1173,10 +1172,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
changed = False
active_sources: Set[str] = set()
# Honour user suppression — `hermes auth remove <provider> <N>` for an
# env-seeded credential marks the env:<VAR> source as suppressed so it
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
# Without this gate the removal is silently undone on the next
# load_pool() call whenever the var is still exported by the shell.
try:
from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
except ImportError:
def _is_source_suppressed(_p, _s): # type: ignore[misc]
return False
if provider == "openrouter":
token = os.getenv("OPENROUTER_API_KEY", "").strip()
if token:
source = "env:OPENROUTER_API_KEY"
if _is_source_suppressed(provider, source):
return changed, active_sources
active_sources.add(source)
changed |= _upsert_entry(
entries,
@ -1213,6 +1224,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
if not token:
continue
source = f"env:{env_var}"
if _is_source_suppressed(provider, source):
continue
active_sources.add(source)
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
base_url = env_url or pconfig.inference_base_url
@ -1257,6 +1270,13 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
changed = False
active_sources: Set[str] = set()
# Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
try:
from hermes_cli.auth import is_source_suppressed as _is_suppressed
except ImportError:
def _is_suppressed(_p, _s): # type: ignore[misc]
return False
# Seed from the custom_providers config entry's api_key field
cp_config = _get_custom_provider_config(pool_key)
if cp_config:
@ -1265,19 +1285,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
name = str(cp_config.get("name") or "").strip()
if api_key:
source = f"config:{name}"
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_key,
"base_url": base_url,
"label": name or source,
},
)
if not _is_suppressed(pool_key, source):
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": api_key,
"base_url": base_url,
"label": name or source,
},
)
# Seed from model.api_key if model.provider=='custom' and model.base_url matches
try:
@ -1297,19 +1318,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
matched_key = get_custom_provider_pool_key(model_base_url)
if matched_key == pool_key:
source = "model_config"
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": model_api_key,
"base_url": model_base_url,
"label": "model_config",
},
)
if not _is_suppressed(pool_key, source):
active_sources.add(source)
changed |= _upsert_entry(
entries,
pool_key,
source,
{
"source": source,
"auth_type": AUTH_TYPE_API_KEY,
"access_token": model_api_key,
"base_url": model_base_url,
"label": "model_config",
},
)
except Exception:
pass

401
agent/credential_sources.py Normal file
View file

@ -0,0 +1,401 @@
"""Unified removal contract for every credential source Hermes reads from.
Hermes seeds its credential pool from many places:
env:<VAR> os.environ / ~/.hermes/.env
claude_code ~/.claude/.credentials.json
hermes_pkce ~/.hermes/.anthropic_oauth.json
device_code auth.json providers.<provider> (nous, openai-codex, ...)
qwen-cli ~/.qwen/oauth_creds.json
gh_cli gh auth token
config:<name> custom_providers config entry
model_config model.api_key when model.provider == "custom"
manual user ran `hermes auth add`
Each source has its own reader inside ``agent.credential_pool._seed_from_*``
(which keep their existing shape we haven't restructured them). What we
unify here is **removal**:
``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
Before this module, every source had an ad-hoc removal branch in
``auth_remove_command``, and several sources had no branch at all so
``auth remove`` silently reverted on the next ``load_pool()`` call for
qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
custom-config sources.
Now every source registers a ``RemovalStep`` that does exactly three things
in the same shape:
1. Clean up whatever externally-readable state the source reads from
(.env line, auth.json block, OAuth file, etc.)
2. Suppress the ``(provider, source_id)`` in auth.json so the
corresponding ``_seed_from_*`` branch skips the upsert on re-load
3. Return ``RemovalResult`` describing what was cleaned and any
diagnostic hints the user should see (shell-exported env vars,
external credential files we deliberately don't delete, etc.)
Adding a new credential source is:
- wire up a reader branch in ``_seed_from_*`` (existing pattern)
- gate that reader behind ``is_source_suppressed(provider, source_id)``
- register a ``RemovalStep`` here
No more per-source if/elif chain in ``auth_remove_command``.
"""
from __future__ import annotations
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import Callable, List, Optional
@dataclass
class RemovalResult:
"""Outcome of removing a credential source.
Attributes:
cleaned: Short strings describing external state that was actually
mutated (``"Cleared XAI_API_KEY from .env"``,
``"Cleared openai-codex OAuth tokens from auth store"``).
Printed as plain lines to the user.
hints: Diagnostic lines ABOUT state the user may need to clean up
themselves or is deliberately left intact (shell-exported env
var, Claude Code credential file we don't delete, etc.).
Printed as plain lines to the user. Always non-destructive.
suppress: Whether to call ``suppress_credential_source`` after
cleanup so future ``load_pool`` calls skip this source.
Default True almost every source needs this to stay sticky.
The only legitimate False is ``manual`` entries, which aren't
seeded from anywhere external.
"""
cleaned: List[str] = field(default_factory=list)
hints: List[str] = field(default_factory=list)
suppress: bool = True
@dataclass
class RemovalStep:
"""How to remove one specific credential source cleanly.
Attributes:
provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
Special value ``"*"`` means "matches any provider" used for
sources like ``manual`` that aren't provider-specific.
source_id: Source identifier as it appears in
``PooledCredential.source``. May be a literal (``"claude_code"``)
or a prefix pattern matched via ``match_fn``.
match_fn: Optional predicate overriding literal ``source_id``
matching. Gets the removed entry's source string. Used for
``env:*`` (any env-seeded key), ``config:*`` (any custom
pool), and ``manual:*`` (any manual-source variant).
remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the
actual cleanup and returns what happened for the user.
description: One-line human-readable description for docs / tests.
"""
provider: str
source_id: str
remove_fn: Callable[..., RemovalResult]
match_fn: Optional[Callable[[str], bool]] = None
description: str = ""
def matches(self, provider: str, source: str) -> bool:
if self.provider != "*" and self.provider != provider:
return False
if self.match_fn is not None:
return self.match_fn(source)
return source == self.source_id
_REGISTRY: List[RemovalStep] = []
def register(step: RemovalStep) -> RemovalStep:
_REGISTRY.append(step)
return step
def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
"""Return the first matching RemovalStep, or None if unregistered.
Unregistered sources fall through to the default remove path in
``auth_remove_command``: the pool entry is already gone (that happens
before dispatch), no external cleanup, no suppression. This is the
correct behaviour for ``manual`` entries they were only ever stored
in the pool, nothing external to clean up.
"""
for step in _REGISTRY:
if step.matches(provider, source):
return step
return None
# ---------------------------------------------------------------------------
# Individual RemovalStep implementations — one per source.
# ---------------------------------------------------------------------------
# Each remove_fn is intentionally small and single-purpose. Adding a new
# credential source means adding ONE entry here — no other changes to
# auth_remove_command.
def _remove_env_source(provider: str, removed) -> RemovalResult:
"""env:<VAR> — the most common case.
Handles three user situations:
1. Var lives only in ~/.hermes/.env clear it
2. Var lives only in the user's shell (shell profile, systemd
EnvironmentFile, launchd plist) hint them where to unset it
3. Var lives in both clear from .env, hint about shell
"""
from hermes_cli.config import get_env_path, remove_env_value
result = RemovalResult()
env_var = removed.source[len("env:"):]
if not env_var:
return result
# Detect shell vs .env BEFORE remove_env_value pops os.environ.
env_in_process = bool(os.getenv(env_var))
env_in_dotenv = False
try:
env_path = get_env_path()
if env_path.exists():
env_in_dotenv = any(
line.strip().startswith(f"{env_var}=")
for line in env_path.read_text(errors="replace").splitlines()
)
except OSError:
pass
shell_exported = env_in_process and not env_in_dotenv
cleared = remove_env_value(env_var)
if cleared:
result.cleaned.append(f"Cleared {env_var} from .env")
if shell_exported:
result.hints.extend([
f"Note: {env_var} is still set in your shell environment "
f"(not in ~/.hermes/.env).",
" Unset it there (shell profile, systemd EnvironmentFile, "
"launchd plist, etc.) or it will keep being visible to Hermes.",
f" The pool entry is now suppressed — Hermes will ignore "
f"{env_var} until you run `hermes auth add {provider}`.",
])
else:
result.hints.append(
f"Suppressed env:{env_var} — it will not be re-seeded even "
f"if the variable is re-exported later."
)
return result
def _remove_claude_code(provider: str, removed) -> RemovalResult:
"""~/.claude/.credentials.json is owned by Claude Code itself.
We don't delete it — the user's Claude Code install still needs to
work. We just suppress it so Hermes stops reading it.
"""
return RemovalResult(hints=[
"Suppressed claude_code credential — it will not be re-seeded.",
"Note: Claude Code credentials still live in ~/.claude/.credentials.json",
"Run `hermes auth add anthropic` to re-enable if needed.",
])
def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
"""~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
from hermes_constants import get_hermes_home
result = RemovalResult()
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
if oauth_file.exists():
try:
oauth_file.unlink()
result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
except OSError as exc:
result.hints.append(f"Could not delete {oauth_file}: {exc}")
return result
def _clear_auth_store_provider(provider: str) -> bool:
"""Delete auth_store.providers[provider]. Returns True if deleted."""
from hermes_cli.auth import (
_auth_store_lock,
_load_auth_store,
_save_auth_store,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
return True
return False
def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
"""Nous OAuth lives in auth.json providers.nous — clear it and suppress.
We suppress in addition to clearing because nothing else stops the
user's next `hermes login` run from writing providers.nous again
before they decide to. Suppression forces them to go through
`hermes auth add nous` to re-engage, which is the documented re-add
path and clears the suppression atomically.
"""
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
return result
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
refresh_codex_oauth_pure() writes both every time, so clearing only
the Hermes auth store is not enough _seed_from_singletons() would
re-import from ~/.codex/auth.json on the next load_pool() call and
the removal would be instantly undone. We suppress instead of
deleting Codex CLI's file, so the Codex CLI itself keeps working.
The canonical source name in ``_seed_from_singletons`` is
``"device_code"`` (no prefix). Entries may show up in the pool as
either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
via ``hermes auth add openai-codex``), but in both cases the re-seed
gate lives at the ``"device_code"`` suppression key. We suppress
that canonical key here; the central dispatcher also suppresses
``removed.source`` which is fine belt-and-suspenders, idempotent.
"""
from hermes_cli.auth import suppress_credential_source
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
# Suppress the canonical re-seed source, not just whatever source the
# removed entry had. Otherwise `manual:device_code` removals wouldn't
# block the `device_code` re-seed path.
suppress_credential_source(provider, "device_code")
result.hints.extend([
"Suppressed openai-codex device_code source — it will not be re-seeded.",
"Note: Codex CLI credentials still live in ~/.codex/auth.json",
"Run `hermes auth add openai-codex` to re-enable if needed.",
])
return result
def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
"""~/.qwen/oauth_creds.json is owned by the Qwen CLI.
Same pattern as claude_code suppress, don't delete. The user's
Qwen CLI install still reads from that file.
"""
return RemovalResult(hints=[
"Suppressed qwen-cli credential — it will not be re-seeded.",
"Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
"Run `hermes auth add qwen-oauth` to re-enable if needed.",
])
def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
"""Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
Copilot is special: the same token can be seeded as multiple source
entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
``_seed_from_env``), so removing one entry without suppressing the
others lets the duplicates resurrect. We suppress ALL known copilot
sources here so removal is stable regardless of which entry the
user clicked.
We don't touch the user's gh CLI or shell state just suppress so
Hermes stops picking the token up.
"""
# Suppress ALL copilot source variants up-front so no path resurrects
# the pool entry. The central dispatcher in auth_remove_command will
# ALSO suppress removed.source, but it's idempotent so double-calling
# is harmless.
from hermes_cli.auth import suppress_credential_source
suppress_credential_source(provider, "gh_cli")
for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
suppress_credential_source(provider, f"env:{env_var}")
return RemovalResult(hints=[
"Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
"Note: Your gh CLI / shell environment is unchanged.",
"Run `hermes auth add copilot` to re-enable if needed.",
])
def _remove_custom_config(provider: str, removed) -> RemovalResult:
"""Custom provider pools are seeded from custom_providers config or
model.api_key. Both are in config.yaml modifying that from here
is more invasive than suppression. We suppress; the user can edit
config.yaml if they want to remove the key from disk entirely.
"""
source_label = removed.source
return RemovalResult(hints=[
f"Suppressed {source_label} — it will not be re-seeded.",
"Note: The underlying value in config.yaml is unchanged. Edit it "
"directly if you want to remove the credential from disk.",
])
def _register_all_sources() -> None:
"""Called once on module import.
ORDER MATTERS ``find_removal_step`` returns the first match. Put
provider-specific steps before the generic ``env:*`` step so that e.g.
copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
doesn't touch the user's shell), not the generic env-var removal
(which would try to clear .env).
"""
register(RemovalStep(
provider="copilot", source_id="gh_cli",
match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
remove_fn=_remove_copilot_gh,
description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
))
register(RemovalStep(
provider="*", source_id="env:",
match_fn=lambda src: src.startswith("env:"),
remove_fn=_remove_env_source,
description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
))
register(RemovalStep(
provider="anthropic", source_id="claude_code",
remove_fn=_remove_claude_code,
description="~/.claude/.credentials.json",
))
register(RemovalStep(
provider="anthropic", source_id="hermes_pkce",
remove_fn=_remove_hermes_pkce,
description="~/.hermes/.anthropic_oauth.json",
))
register(RemovalStep(
provider="nous", source_id="device_code",
remove_fn=_remove_nous_device_code,
description="auth.json providers.nous",
))
register(RemovalStep(
provider="openai-codex", source_id="device_code",
match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
remove_fn=_remove_codex_device_code,
description="auth.json providers.openai-codex + ~/.codex/auth.json",
))
register(RemovalStep(
provider="qwen-oauth", source_id="qwen-cli",
remove_fn=_remove_qwen_cli,
description="~/.qwen/oauth_creds.json",
))
register(RemovalStep(
provider="*", source_id="config:",
match_fn=lambda src: src.startswith("config:") or src == "model_config",
remove_fn=_remove_custom_config,
description="Custom provider config.yaml api_key field",
))
_register_all_sources()

111
agent/file_safety.py Normal file
View file

@ -0,0 +1,111 @@
"""Shared file safety rules used by both tools and ACP shims."""
from __future__ import annotations
import os
from pathlib import Path
from typing import Optional
def _hermes_home_path() -> Path:
"""Resolve the active HERMES_HOME (profile-aware) without circular imports."""
try:
from hermes_constants import get_hermes_home # local import to avoid cycles
return get_hermes_home()
except Exception:
return Path(os.path.expanduser("~/.hermes"))
def build_write_denied_paths(home: str) -> set[str]:
"""Return exact sensitive paths that must never be written."""
hermes_home = _hermes_home_path()
return {
os.path.realpath(p)
for p in [
os.path.join(home, ".ssh", "authorized_keys"),
os.path.join(home, ".ssh", "id_rsa"),
os.path.join(home, ".ssh", "id_ed25519"),
os.path.join(home, ".ssh", "config"),
str(hermes_home / ".env"),
os.path.join(home, ".bashrc"),
os.path.join(home, ".zshrc"),
os.path.join(home, ".profile"),
os.path.join(home, ".bash_profile"),
os.path.join(home, ".zprofile"),
os.path.join(home, ".netrc"),
os.path.join(home, ".pgpass"),
os.path.join(home, ".npmrc"),
os.path.join(home, ".pypirc"),
"/etc/sudoers",
"/etc/passwd",
"/etc/shadow",
]
}
def build_write_denied_prefixes(home: str) -> list[str]:
"""Return sensitive directory prefixes that must never be written."""
return [
os.path.realpath(p) + os.sep
for p in [
os.path.join(home, ".ssh"),
os.path.join(home, ".aws"),
os.path.join(home, ".gnupg"),
os.path.join(home, ".kube"),
"/etc/sudoers.d",
"/etc/systemd",
os.path.join(home, ".docker"),
os.path.join(home, ".azure"),
os.path.join(home, ".config", "gh"),
]
]
def get_safe_write_root() -> Optional[str]:
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
if not root:
return None
try:
return os.path.realpath(os.path.expanduser(root))
except Exception:
return None
def is_write_denied(path: str) -> bool:
"""Return True if path is blocked by the write denylist or safe root."""
home = os.path.realpath(os.path.expanduser("~"))
resolved = os.path.realpath(os.path.expanduser(str(path)))
if resolved in build_write_denied_paths(home):
return True
for prefix in build_write_denied_prefixes(home):
if resolved.startswith(prefix):
return True
safe_root = get_safe_write_root()
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
return True
return False
def get_read_block_error(path: str) -> Optional[str]:
"""Return an error message when a read targets internal Hermes cache files."""
resolved = Path(path).expanduser().resolve()
hermes_home = _hermes_home_path().resolve()
blocked_dirs = [
hermes_home / "skills" / ".hub" / "index-cache",
hermes_home / "skills" / ".hub",
]
for blocked in blocked_dirs:
try:
resolved.relative_to(blocked)
except ValueError:
continue
return (
f"Access denied: {path} is an internal Hermes cache file "
"and cannot be read directly to prevent prompt injection. "
"Use the skills_list or skill_view tools instead."
)
return None

View file

@ -170,6 +170,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"Qwen/Qwen3.5-35B-A3B": 131072,
"deepseek-ai/DeepSeek-V3.2": 65536,
"moonshotai/Kimi-K2.5": 262144,
"moonshotai/Kimi-K2.6": 262144,
"moonshotai/Kimi-K2-Thinking": 262144,
"MiniMaxAI/MiniMax-M2.5": 204800,
"XiaomiMiMo/MiMo-V2-Flash": 256000,

View file

@ -8,6 +8,7 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
import json
import logging
import re
import subprocess
from datetime import datetime
from pathlib import Path
from typing import Any, Dict, Optional
@ -22,6 +23,110 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
# left as-is so the user can debug them.
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
# Matches inline shell snippets like: !`date +%Y-%m-%d`
# Non-greedy, single-line only — no newlines inside the backticks.
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
# Cap inline-shell output so a runaway command can't blow out the context.
_INLINE_SHELL_MAX_OUTPUT = 4000
def _load_skills_config() -> dict:
"""Load the ``skills`` section of config.yaml (best-effort)."""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
skills_cfg = cfg.get("skills")
if isinstance(skills_cfg, dict):
return skills_cfg
except Exception:
logger.debug("Could not read skills config", exc_info=True)
return {}
def _substitute_template_vars(
content: str,
skill_dir: Path | None,
session_id: str | None,
) -> str:
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
Only substitutes tokens for which a concrete value is available
unresolved tokens are left in place so the author can spot them.
"""
if not content:
return content
skill_dir_str = str(skill_dir) if skill_dir else None
def _replace(match: re.Match) -> str:
token = match.group(1)
if token == "HERMES_SKILL_DIR" and skill_dir_str:
return skill_dir_str
if token == "HERMES_SESSION_ID" and session_id:
return str(session_id)
return match.group(0)
return _SKILL_TEMPLATE_RE.sub(_replace, content)
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
"""Execute a single inline-shell snippet and return its stdout (trimmed).
Failures return a short ``[inline-shell error: ...]`` marker instead of
raising, so one bad snippet can't wreck the whole skill message.
"""
try:
completed = subprocess.run(
["bash", "-c", command],
cwd=str(cwd) if cwd else None,
capture_output=True,
text=True,
timeout=max(1, int(timeout)),
check=False,
)
except subprocess.TimeoutExpired:
return f"[inline-shell timeout after {timeout}s: {command}]"
except FileNotFoundError:
return f"[inline-shell error: bash not found]"
except Exception as exc:
return f"[inline-shell error: {exc}]"
output = (completed.stdout or "").rstrip("\n")
if not output and completed.stderr:
output = completed.stderr.rstrip("\n")
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
return output
def _expand_inline_shell(
content: str,
skill_dir: Path | None,
timeout: int,
) -> str:
"""Replace every !`cmd` snippet in ``content`` with its stdout.
Runs each snippet with the skill directory as CWD so relative paths in
the snippet work the way the author expects.
"""
if "!`" not in content:
return content
def _replace(match: re.Match) -> str:
cmd = match.group(1).strip()
if not cmd:
return ""
return _run_inline_shell(cmd, skill_dir, timeout)
return _INLINE_SHELL_RE.sub(_replace, content)
def build_plan_path(
user_instruction: str = "",
@ -133,14 +238,36 @@ def _build_skill_message(
activation_note: str,
user_instruction: str = "",
runtime_note: str = "",
session_id: str | None = None,
) -> str:
"""Format a loaded skill into a user/system message payload."""
from tools.skills_tool import SKILLS_DIR
content = str(loaded_skill.get("content") or "")
# ── Template substitution and inline-shell expansion ──
# Done before anything else so downstream blocks (setup notes,
# supporting-file hints) see the expanded content.
skills_cfg = _load_skills_config()
if skills_cfg.get("template_vars", True):
content = _substitute_template_vars(content, skill_dir, session_id)
if skills_cfg.get("inline_shell", False):
timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
content = _expand_inline_shell(content, skill_dir, timeout)
parts = [activation_note, "", content.strip()]
# ── Inject the absolute skill directory so the agent can reference
# bundled scripts without an extra skill_view() round-trip. ──
if skill_dir:
parts.append("")
parts.append(f"[Skill directory: {skill_dir}]")
parts.append(
"Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
"`templates/config.yaml`) against that directory, then run them "
"with the terminal tool using the absolute path."
)
# ── Inject resolved skill config values ──
_inject_skill_config(loaded_skill, parts)
@ -188,11 +315,13 @@ def _build_skill_message(
# Skill is from an external dir — use the skill name instead
skill_view_target = skill_dir.name
parts.append("")
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
parts.append("[This skill has supporting files:]")
for sf in supporting:
parts.append(f"- {sf}")
parts.append(f"- {sf} -> {skill_dir / sf}")
parts.append(
f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
f'\nLoad any of these with skill_view(name="{skill_view_target}", '
f'file_path="<path>"), or run scripts directly by absolute path '
f"(e.g. `node {skill_dir}/scripts/foo.js`)."
)
if user_instruction:
@ -332,6 +461,7 @@ def build_skill_invocation_message(
activation_note,
user_instruction=user_instruction,
runtime_note=runtime_note,
session_id=task_id,
)
@ -370,6 +500,7 @@ def build_preloaded_skills_prompt(
loaded_skill,
skill_dir,
activation_note,
session_id=task_id,
)
)
loaded_names.append(skill_name)

View file

@ -0,0 +1,39 @@
"""Transport layer types and registry for provider response normalization.
Usage:
from agent.transports import get_transport
transport = get_transport("anthropic_messages")
result = transport.normalize_response(raw_response)
"""
from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
_REGISTRY: dict = {}
def register_transport(api_mode: str, transport_cls: type) -> None:
"""Register a transport class for an api_mode string."""
_REGISTRY[api_mode] = transport_cls
def get_transport(api_mode: str):
"""Get a transport instance for the given api_mode.
Returns None if no transport is registered for this api_mode.
This allows gradual migration call sites can check for None
and fall back to the legacy code path.
"""
if not _REGISTRY:
_discover_transports()
cls = _REGISTRY.get(api_mode)
if cls is None:
return None
return cls()
def _discover_transports() -> None:
"""Import all transport modules to trigger auto-registration."""
try:
import agent.transports.anthropic # noqa: F401
except ImportError:
pass

View file

@ -0,0 +1,129 @@
"""Anthropic Messages API transport.
Delegates to the existing adapter functions in agent/anthropic_adapter.py.
This transport owns format conversion and normalization NOT client lifecycle.
"""
from typing import Any, Dict, List, Optional
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse
class AnthropicTransport(ProviderTransport):
"""Transport for api_mode='anthropic_messages'.
Wraps the existing functions in anthropic_adapter.py behind the
ProviderTransport ABC. Each method delegates no logic is duplicated.
"""
@property
def api_mode(self) -> str:
return "anthropic_messages"
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI messages to Anthropic (system, messages) tuple.
kwargs:
base_url: Optional[str] affects thinking signature handling.
"""
from agent.anthropic_adapter import convert_messages_to_anthropic
base_url = kwargs.get("base_url")
return convert_messages_to_anthropic(messages, base_url=base_url)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Anthropic input_schema format."""
from agent.anthropic_adapter import convert_tools_to_anthropic
return convert_tools_to_anthropic(tools)
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build Anthropic messages.create() kwargs.
Calls convert_messages and convert_tools internally.
params (all optional):
max_tokens: int
reasoning_config: dict | None
tool_choice: str | None
is_oauth: bool
preserve_dots: bool
context_length: int | None
base_url: str | None
fast_mode: bool
"""
from agent.anthropic_adapter import build_anthropic_kwargs
return build_anthropic_kwargs(
model=model,
messages=messages,
tools=tools,
max_tokens=params.get("max_tokens", 16384),
reasoning_config=params.get("reasoning_config"),
tool_choice=params.get("tool_choice"),
is_oauth=params.get("is_oauth", False),
preserve_dots=params.get("preserve_dots", False),
context_length=params.get("context_length"),
base_url=params.get("base_url"),
fast_mode=params.get("fast_mode", False),
)
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize Anthropic response to NormalizedResponse.
kwargs:
strip_tool_prefix: bool strip 'mcp_mcp_' prefixes from tool names.
"""
from agent.anthropic_adapter import normalize_anthropic_response_v2
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
def validate_response(self, response: Any) -> bool:
"""Check Anthropic response structure is valid."""
if response is None:
return False
content_blocks = getattr(response, "content", None)
if not isinstance(content_blocks, list):
return False
if not content_blocks:
return False
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Extract Anthropic cache_read and cache_creation token counts."""
usage = getattr(response, "usage", None)
if usage is None:
return None
cached = getattr(usage, "cache_read_input_tokens", 0) or 0
written = getattr(usage, "cache_creation_input_tokens", 0) or 0
if cached or written:
return {"cached_tokens": cached, "creation_tokens": written}
return None
# Promote the adapter's canonical mapping to module level so it's shared
_STOP_REASON_MAP = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
"model_context_window_exceeded": "length",
}
def map_finish_reason(self, raw_reason: str) -> str:
"""Map Anthropic stop_reason to OpenAI finish_reason."""
return self._STOP_REASON_MAP.get(raw_reason, "stop")
# Auto-register on import
from agent.transports import register_transport # noqa: E402
register_transport("anthropic_messages", AnthropicTransport)

89
agent/transports/base.py Normal file
View file

@ -0,0 +1,89 @@
"""Abstract base for provider transports.
A transport owns the data path for one api_mode:
convert_messages convert_tools build_kwargs normalize_response
It does NOT own: client construction, streaming, credential refresh,
prompt caching, interrupt handling, or retry logic. Those stay on AIAgent.
"""
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
from agent.transports.types import NormalizedResponse
class ProviderTransport(ABC):
"""Base class for provider-specific format conversion and normalization."""
@property
@abstractmethod
def api_mode(self) -> str:
"""The api_mode string this transport handles (e.g. 'anthropic_messages')."""
...
@abstractmethod
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI-format messages to provider-native format.
Returns provider-specific structure (e.g. (system, messages) for Anthropic,
or the messages list unchanged for chat_completions).
"""
...
@abstractmethod
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI-format tool definitions to provider-native format.
Returns provider-specific tool list (e.g. Anthropic input_schema format).
"""
...
@abstractmethod
def build_kwargs(
self,
model: str,
messages: List[Dict[str, Any]],
tools: Optional[List[Dict[str, Any]]] = None,
**params,
) -> Dict[str, Any]:
"""Build the complete API call kwargs dict.
This is the primary entry point it typically calls convert_messages()
and convert_tools() internally, then adds model-specific config.
Returns a dict ready to be passed to the provider's SDK client.
"""
...
@abstractmethod
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize a raw provider response to the shared NormalizedResponse type.
This is the only method that returns a transport-layer type.
"""
...
def validate_response(self, response: Any) -> bool:
"""Optional: check if the raw response is structurally valid.
Returns True if valid, False if the response should be treated as invalid.
Default implementation always returns True.
"""
return True
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
"""Optional: extract provider-specific cache hit/creation stats.
Returns dict with 'cached_tokens' and 'creation_tokens', or None.
Default returns None.
"""
return None
def map_finish_reason(self, raw_reason: str) -> str:
"""Optional: map provider-specific stop reason to OpenAI equivalent.
Default returns the raw reason unchanged. Override for providers
with different stop reason vocabularies.
"""
return raw_reason

100
agent/transports/types.py Normal file
View file

@ -0,0 +1,100 @@
"""Shared types for normalized provider responses.
These dataclasses define the canonical shape that all provider adapters
normalize responses to. The shared surface is intentionally minimal
only fields that every downstream consumer reads are top-level.
Protocol-specific state goes in ``provider_data`` dicts (response-level
and per-tool-call) so that protocol-aware code paths can access it
without polluting the shared type.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional
@dataclass
class ToolCall:
"""A normalized tool call from any provider.
``id`` is the protocol's canonical identifier — what gets used in
``tool_call_id`` / ``tool_use_id`` when constructing tool result
messages. May be ``None`` when the provider omits it; the agent
fills it via ``_deterministic_call_id()`` before storing in history.
``provider_data`` carries per-tool-call protocol metadata that only
protocol-aware code reads:
* Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
* Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
* Others: ``None``
"""
id: Optional[str]
name: str
arguments: str # JSON string
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
@dataclass
class Usage:
"""Token usage from an API response."""
prompt_tokens: int = 0
completion_tokens: int = 0
total_tokens: int = 0
cached_tokens: int = 0
@dataclass
class NormalizedResponse:
"""Normalized API response from any provider.
Shared fields are truly cross-provider every caller can rely on
them without branching on api_mode. Protocol-specific state goes in
``provider_data`` so that only protocol-aware code paths read it.
Response-level ``provider_data`` examples:
* Anthropic: ``{"reasoning_details": [...]}``
* Codex: ``{"codex_reasoning_items": [...]}``
* Others: ``None``
"""
content: Optional[str]
tool_calls: Optional[List[ToolCall]]
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
reasoning: Optional[str] = None
usage: Optional[Usage] = None
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
# ---------------------------------------------------------------------------
# Factory helpers
# ---------------------------------------------------------------------------
def build_tool_call(
id: Optional[str],
name: str,
arguments: Any,
**provider_fields: Any,
) -> ToolCall:
"""Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
Any extra keyword arguments are collected into ``provider_data``.
"""
args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
pd = dict(provider_fields) if provider_fields else None
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
"""Translate a provider-specific stop reason to the normalised set.
Falls back to ``"stop"`` for unknown or ``None`` reasons.
"""
if reason is None:
return "stop"
return mapping.get(reason, "stop")

214
cli.py
View file

@ -19,6 +19,7 @@ import shutil
import sys
import json
import re
import concurrent.futures
import base64
import atexit
import tempfile
@ -65,6 +66,7 @@ from agent.usage_pricing import (
format_duration_compact,
format_token_count_compact,
)
from agent.account_usage import fetch_account_usage, render_account_usage_lines
from hermes_cli.banner import _format_context_length, format_banner_version_label
_COMMAND_SPINNER_FRAMES = ("", "", "", "", "", "", "", "", "", "")
@ -5271,6 +5273,30 @@ class HermesCLI:
except Exception:
return False
def _should_handle_steer_command_inline(self, text: str, has_images: bool = False) -> bool:
"""Return True when /steer should be dispatched immediately while the agent is running.
/steer MUST bypass the normal _pending_input process_loop path when
the agent is active, because process_loop is blocked inside
self.chat() for the duration of the run. By the time the queued
command is pulled from _pending_input, _agent_running has already
flipped back to False, and process_command() takes the idle
fallback delivering the steer as a next-turn message instead of
injecting it mid-run. Dispatching inline on the UI thread calls
agent.steer() directly, which is thread-safe (uses _pending_steer_lock).
"""
if not text or has_images or not _looks_like_slash_command(text):
return False
if not getattr(self, "_agent_running", False):
return False
try:
from hermes_cli.commands import resolve_command
base = text.split(None, 1)[0].lower().lstrip('/')
cmd = resolve_command(base)
return bool(cmd and cmd.name == "steer")
except Exception:
return False
def _show_model_and_providers(self):
"""Show current model + provider and list all authenticated providers.
@ -7022,6 +7048,27 @@ class HermesCLI:
if cost_result.status == "unknown":
print(f" Note: Pricing unknown for {agent.model}")
# Account limits -- fetched off-thread with a hard timeout so slow
# provider APIs don't hang the prompt.
provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
account_snapshot = None
if provider:
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
try:
account_snapshot = _pool.submit(
fetch_account_usage, provider,
base_url=base_url, api_key=api_key,
).result(timeout=10.0)
except (concurrent.futures.TimeoutError, Exception):
account_snapshot = None
account_lines = [f" {line}" for line in render_account_usage_lines(account_snapshot)]
if account_lines:
print()
for line in account_lines:
print(line)
if self.verbose:
logging.getLogger().setLevel(logging.DEBUG)
for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
@ -7398,11 +7445,12 @@ class HermesCLI:
self._voice_stop_and_transcribe()
# Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict)
try:
from tools.voice_mode import play_beep
play_beep(frequency=880, count=1)
except Exception:
pass
if self._voice_beeps_enabled():
try:
from tools.voice_mode import play_beep
play_beep(frequency=880, count=1)
except Exception:
pass
try:
self._voice_recorder.start(on_silence_stop=_on_silence)
@ -7450,11 +7498,12 @@ class HermesCLI:
wav_path = self._voice_recorder.stop()
# Audio cue: double beep after stream stopped (no CoreAudio conflict)
try:
from tools.voice_mode import play_beep
play_beep(frequency=660, count=2)
except Exception:
pass
if self._voice_beeps_enabled():
try:
from tools.voice_mode import play_beep
play_beep(frequency=660, count=2)
except Exception:
pass
if wav_path is None:
_cprint(f"{_DIM}No speech detected.{_RST}")
@ -7604,6 +7653,17 @@ class HermesCLI:
_cprint(f"Unknown voice subcommand: {subcommand}")
_cprint("Usage: /voice [on|off|tts|status]")
def _voice_beeps_enabled(self) -> bool:
"""Return whether CLI voice mode should play record start/stop beeps."""
try:
from hermes_cli.config import load_config
voice_cfg = load_config().get("voice", {})
if isinstance(voice_cfg, dict):
return bool(voice_cfg.get("beep_enabled", True))
except Exception:
pass
return True
def _enable_voice_mode(self):
"""Enable voice mode after checking requirements."""
if self._voice_mode:
@ -8007,8 +8067,18 @@ class HermesCLI:
choice_wrapped: list[tuple[int, str]] = []
for i, choice in enumerate(choices):
label = choice_labels.get(choice, choice)
prefix = ' ' if i == selected else ' '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
if i < 9:
num_prefix = str(i + 1)
elif i == 9:
num_prefix = '0'
else:
num_prefix = ' ' # No number for items beyond 10th
if i == selected:
prefix = f' {num_prefix}. '
else:
prefix = f' {num_prefix}. '
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
choice_wrapped.append((i, wrapped))
# Budget vertical space so HSplit never clips the command or choices.
@ -9075,6 +9145,17 @@ class HermesCLI:
event.app.current_buffer.reset(append_to_history=True)
return
# Handle /steer while the agent is running immediately on the
# UI thread. Queuing through _pending_input would deadlock the
# steer until after the agent loop finishes (process_loop is
# blocked inside self.chat()), which turns /steer into a
# post-run next-turn message — defeating mid-run injection.
# agent.steer() is thread-safe (holds _pending_steer_lock).
if self._should_handle_steer_command_inline(text, has_images=has_images):
self.process_command(text)
event.app.current_buffer.reset(append_to_history=True)
return
# Snapshot and clear attached images
images = list(self._attached_images)
self._attached_images.clear()
@ -9172,6 +9253,29 @@ class HermesCLI:
self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1)
event.app.invalidate()
# Number keys for quick clarify selection (1-9, 0 for 10th item)
def _make_clarify_number_handler(idx):
def handler(event):
if self._clarify_state and not self._clarify_freetext:
choices = self._clarify_state.get("choices") or []
# Map index to choice (treating "Other" as the last option)
if idx < len(choices):
# Select a numbered choice
self._clarify_state["response_queue"].put(choices[idx])
self._clarify_state = None
self._clarify_freetext = False
event.app.invalidate()
elif idx == len(choices):
# Select "Other" option
self._clarify_freetext = True
event.app.invalidate()
return handler
for _num in range(10):
# 1-9 select items 0-8, 0 selects item 9 (10thitem)
_idx = 9 if _num == 0 else _num - 1
kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx))
# --- Dangerous command approval: arrow-key navigation ---
@kb.add('up', filter=Condition(lambda: bool(self._approval_state)))
@ -9213,6 +9317,20 @@ class HermesCLI:
event.app.current_buffer.reset()
event.app.invalidate()
# Number keys for quick approval selection (1-9, 0 for 10th item)
def _make_approval_number_handler(idx):
def handler(event):
if self._approval_state and idx < len(self._approval_state["choices"]):
self._approval_state["selected"] = idx
self._handle_approval_selection()
event.app.invalidate()
return handler
for _num in range(10):
# 1-9 select items 0-8, 0 selects item 9 (10th item)
_idx = 9 if _num == 0 else _num - 1
kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx))
# --- History navigation: up/down browse history in normal input mode ---
# The TextArea is multiline, so by default up/down only move the cursor.
# Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
@ -9781,14 +9899,32 @@ class HermesCLI:
selected = state.get("selected", 0)
preview_lines = _wrap_panel_text(question, 60)
for i, choice in enumerate(choices):
prefix = " " if i == selected and not cli_ref._clarify_freetext else " "
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" "))
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
if i < 9:
num_prefix = str(i + 1)
elif i == 9:
num_prefix = '0'
else:
num_prefix = ' '
if i == selected and not cli_ref._clarify_freetext:
prefix = f" {num_prefix}. "
else:
prefix = f" {num_prefix}. "
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" "))
# "Other" option in preview
other_num = len(choices) + 1
if other_num < 10:
other_num_prefix = str(other_num)
elif other_num == 10:
other_num_prefix = '0'
else:
other_num_prefix = ' '
other_label = (
" Other (type below)" if cli_ref._clarify_freetext
else " Other (type your answer)" if selected == len(choices)
else " Other (type your answer)"
f" {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext
else f" {other_num_prefix}. Other (type your answer)" if selected == len(choices)
else f" {other_num_prefix}. Other (type your answer)"
)
preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" "))
preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" "))
box_width = _panel_box_width("Hermes needs your input", preview_lines)
inner_text_width = max(8, box_width - 2)
@ -9796,18 +9932,35 @@ class HermesCLI:
choice_wrapped: list[tuple[int, str]] = []
if choices:
for i, choice in enumerate(choices):
prefix = ' ' if i == selected and not cli_ref._clarify_freetext else ' '
for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "):
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
if i < 9:
num_prefix = str(i + 1)
elif i == 9:
num_prefix = '0'
else:
num_prefix = ' '
if i == selected and not cli_ref._clarify_freetext:
prefix = f' {num_prefix}. '
else:
prefix = f' {num_prefix}. '
for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "):
choice_wrapped.append((i, wrapped))
# Trailing Other row(s)
other_idx = len(choices)
if selected == other_idx and not cli_ref._clarify_freetext:
other_label_mand = ' Other (type your answer)'
elif cli_ref._clarify_freetext:
other_label_mand = ' Other (type below)'
other_num = other_idx + 1
if other_num < 10:
other_num_prefix = str(other_num)
elif other_num == 10:
other_num_prefix = '0'
else:
other_label_mand = ' Other (type your answer)'
other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ")
other_num_prefix = ' '
if selected == other_idx and not cli_ref._clarify_freetext:
other_label_mand = f' {other_num_prefix}. Other (type your answer)'
elif cli_ref._clarify_freetext:
other_label_mand = f' {other_num_prefix}. Other (type below)'
else:
other_label_mand = f' {other_num_prefix}. Other (type your answer)'
other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ")
elif cli_ref._clarify_freetext:
# Freetext-only mode: the guidance line takes the place of choices.
other_wrapped = _wrap_panel_text(
@ -9872,6 +10025,15 @@ class HermesCLI:
# "Other" option (trailing row(s), only shown when choices exist)
other_idx = len(choices)
# Calculate number prefix for "Other" option
other_num = other_idx + 1
if other_num < 10:
other_num_prefix = str(other_num)
elif other_num == 10:
other_num_prefix = '0'
else:
other_num_prefix = ' '
if selected == other_idx and not cli_ref._clarify_freetext:
other_style = 'class:clarify-selected'
elif cli_ref._clarify_freetext:

View file

@ -252,7 +252,11 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
future = asyncio.run_coroutine_threadsafe(coro, loop)
result = future.result(timeout=30)
try:
result = future.result(timeout=30)
except TimeoutError:
future.cancel()
raise
if result and not getattr(result, "success", True):
logger.warning(
"Job '%s': media send failed for %s: %s",
@ -382,7 +386,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
loop,
)
send_result = future.result(timeout=60)
try:
send_result = future.result(timeout=60)
except TimeoutError:
future.cancel()
raise
if send_result and not getattr(send_result, "success", True):
err = getattr(send_result, "error", "unknown")
logger.warning(

View file

@ -19,6 +19,8 @@ import uuid
from abc import ABC, abstractmethod
from urllib.parse import urlsplit
from utils import normalize_proxy_url
logger = logging.getLogger(__name__)
@ -159,13 +161,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
if platform_env_var:
value = (os.environ.get(platform_env_var) or "").strip()
if value:
return value
return normalize_proxy_url(value)
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
value = (os.environ.get(key) or "").strip()
if value:
return value
return _detect_macos_system_proxy()
return normalize_proxy_url(value)
return normalize_proxy_url(_detect_macos_system_proxy())
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:

View file

@ -794,8 +794,28 @@ class TelegramAdapter(BasePlatformAdapter):
# Telegram pushes updates to our HTTP endpoint. This
# enables cloud platforms (Fly.io, Railway) to auto-wake
# suspended machines on inbound HTTP traffic.
#
# SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
# python-telegram-bot passes secret_token=None and the
# webhook endpoint accepts any HTTP POST — attackers can
# inject forged updates as if from Telegram. Refuse to
# start rather than silently run in fail-open mode.
# See GHSA-3vpc-7q5r-276h.
webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
if not webhook_secret:
raise RuntimeError(
"TELEGRAM_WEBHOOK_SECRET is required when "
"TELEGRAM_WEBHOOK_URL is set. Without it, the "
"webhook endpoint accepts forged updates from "
"anyone who can reach it — see "
"https://github.com/NousResearch/hermes-agent/"
"security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
"Generate a secret and set it in your .env:\n"
" export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
"Then register it with Telegram when setting the "
"webhook via setWebhook's secret_token parameter."
)
from urllib.parse import urlparse
webhook_path = urlparse(webhook_url).path or "/telegram"
@ -2333,10 +2353,16 @@ class TelegramAdapter(BasePlatformAdapter):
DMs remain unrestricted. Group/supergroup messages are accepted when:
- the chat is explicitly allowlisted in ``free_response_chats``
- ``require_mention`` is disabled
- the message is a command
- the message replies to the bot
- the bot is @mentioned
- the text/caption matches a configured regex wake-word pattern
When ``require_mention`` is enabled, slash commands are not given
special treatment they must pass the same mention/reply checks
as any other group message. Users can still trigger commands via
the Telegram bot menu (``/command@botname``) or by explicitly
mentioning the bot (``@botname /command``), both of which are
recognised as mentions by :meth:`_message_mentions_bot`.
"""
if not self._is_group_chat(message):
return True
@ -2351,8 +2377,6 @@ class TelegramAdapter(BasePlatformAdapter):
return True
if not self._telegram_require_mention():
return True
if is_command:
return True
if self._is_reply_to_bot(message):
return True
if self._message_mentions_bot(message):

View file

@ -30,6 +30,8 @@ from pathlib import Path
from datetime import datetime
from typing import Dict, Optional, Any, List
from agent.account_usage import fetch_account_usage, render_account_usage_lines
# --- Agent cache tuning ---------------------------------------------------
# Bounds the per-session AIAgent cache to prevent unbounded growth in
# long-lived gateways (each AIAgent holds LLM clients, tool schemas,
@ -279,6 +281,7 @@ from gateway.session import (
build_session_context,
build_session_context_prompt,
build_session_key,
is_shared_multi_user_session,
)
from gateway.delivery import DeliveryRouter
from gateway.platforms.base import (
@ -3791,12 +3794,12 @@ class GatewayRunner:
history = history or []
message_text = event.text or ""
_is_shared_thread = (
source.chat_type != "dm"
and source.thread_id
and not getattr(self.config, "thread_sessions_per_user", False)
_is_shared_multi_user = is_shared_multi_user_session(
source,
group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
)
if _is_shared_thread and source.user_name:
if _is_shared_multi_user and source.user_name:
message_text = f"[{source.user_name}] {message_text}"
if event.media_urls:
@ -7263,6 +7266,38 @@ class GatewayRunner:
if cached:
agent = cached[0]
# Resolve provider/base_url/api_key for the account-usage fetch.
# Prefer the live agent; fall back to persisted billing data on the
# SessionDB row so `/usage` still returns account info between turns
# when no agent is resident.
provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
if not provider and getattr(self, "_session_db", None) is not None:
try:
_entry_for_billing = self.session_store.get_or_create_session(source)
persisted = self._session_db.get_session(_entry_for_billing.session_id) or {}
except Exception:
persisted = {}
provider = provider or persisted.get("billing_provider")
base_url = base_url or persisted.get("billing_base_url")
# Fetch account usage off the event loop so slow provider APIs don't
# block the gateway. Failures are non-fatal -- account_lines stays [].
account_lines: list[str] = []
if provider:
try:
account_snapshot = await asyncio.to_thread(
fetch_account_usage,
provider,
base_url=base_url,
api_key=api_key,
)
except Exception:
account_snapshot = None
if account_snapshot:
account_lines = render_account_usage_lines(account_snapshot, markdown=True)
if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
lines = []
@ -7320,6 +7355,10 @@ class GatewayRunner:
if ctx.compression_count:
lines.append(f"Compressions: {ctx.compression_count}")
if account_lines:
lines.append("")
lines.extend(account_lines)
return "\n".join(lines)
# No agent at all -- check session history for a rough count
@ -7329,12 +7368,18 @@ class GatewayRunner:
from agent.model_metadata import estimate_messages_tokens_rough
msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
approx = estimate_messages_tokens_rough(msgs)
return (
f"📊 **Session Info**\n"
f"Messages: {len(msgs)}\n"
f"Estimated context: ~{approx:,} tokens\n"
f"_(Detailed usage available after the first agent response)_"
)
lines = [
"📊 **Session Info**",
f"Messages: {len(msgs)}",
f"Estimated context: ~{approx:,} tokens",
"_(Detailed usage available after the first agent response)_",
]
if account_lines:
lines.append("")
lines.extend(account_lines)
return "\n".join(lines)
if account_lines:
return "\n".join(account_lines)
return "No usage data available for this session."
async def _handle_insights_command(self, event: MessageEvent) -> str:
@ -10774,6 +10819,12 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
except (ProcessLookupError, PermissionError, OSError):
pass
remove_pid_file()
# remove_pid_file() is a no-op when the PID doesn't match.
# Force-unlink to cover the old-process-crashed case.
try:
(get_hermes_home() / "gateway.pid").unlink(missing_ok=True)
except Exception:
pass
# Clean up any takeover marker the old process didn't consume
# (e.g. SIGKILL'd before its shutdown handler could read it).
try:
@ -10912,6 +10963,30 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
else:
logger.info("Skipping signal handlers (not running in main thread).")
# Claim the PID file BEFORE bringing up any platform adapters.
# This closes the --replace race window: two concurrent `gateway run
# --replace` invocations both pass the termination-wait above, but
# only the winner of the O_CREAT|O_EXCL race below will ever open
# Telegram polling, Discord gateway sockets, etc. The loser exits
# cleanly before touching any external service.
import atexit
from gateway.status import write_pid_file, remove_pid_file, get_running_pid
_current_pid = get_running_pid()
if _current_pid is not None and _current_pid != os.getpid():
logger.error(
"Another gateway instance (PID %d) started during our startup. "
"Exiting to avoid double-running.", _current_pid
)
return False
try:
write_pid_file()
except FileExistsError:
logger.error(
"PID file race lost to another gateway instance. Exiting."
)
return False
atexit.register(remove_pid_file)
# Start the gateway
success = await runner.start()
if not success:
@ -10921,12 +10996,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
return True
# Write PID file so CLI can detect gateway is running
import atexit
from gateway.status import write_pid_file, remove_pid_file
write_pid_file()
atexit.register(remove_pid_file)
# Start background cron ticker so scheduled jobs fire automatically.
# Pass the event loop so cron delivery can use live adapters (E2EE support).
cron_stop = threading.Event()

View file

@ -152,6 +152,7 @@ class SessionContext:
source: SessionSource
connected_platforms: List[Platform]
home_channels: Dict[Platform, HomeChannel]
shared_multi_user_session: bool = False
# Session metadata
session_key: str = ""
@ -166,6 +167,7 @@ class SessionContext:
"home_channels": {
p.value: hc.to_dict() for p, hc in self.home_channels.items()
},
"shared_multi_user_session": self.shared_multi_user_session,
"session_key": self.session_key,
"session_id": self.session_id,
"created_at": self.created_at.isoformat() if self.created_at else None,
@ -240,18 +242,16 @@ def build_session_context_prompt(
lines.append(f"**Channel Topic:** {context.source.chat_topic}")
# User identity.
# In shared thread sessions (non-DM with thread_id), multiple users
# contribute to the same conversation. Don't pin a single user name
# in the system prompt — it changes per-turn and would bust the prompt
# cache. Instead, note that this is a multi-user thread; individual
# sender names are prefixed on each user message by the gateway.
_is_shared_thread = (
context.source.chat_type != "dm"
and context.source.thread_id
)
if _is_shared_thread:
# In shared multi-user sessions (shared threads OR shared non-thread groups
# when group_sessions_per_user=False), multiple users contribute to the same
# conversation. Don't pin a single user name in the system prompt — it
# changes per-turn and would bust the prompt cache. Instead, note that
# this is a multi-user session; individual sender names are prefixed on
# each user message by the gateway.
if context.shared_multi_user_session:
session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
lines.append(
"**Session type:** Multi-user thread — messages are prefixed "
f"**Session type:** {session_label} — messages are prefixed "
"with [sender name]. Multiple users may participate."
)
elif context.source.user_name:
@ -467,6 +467,27 @@ class SessionEntry:
)
def is_shared_multi_user_session(
source: SessionSource,
*,
group_sessions_per_user: bool = True,
thread_sessions_per_user: bool = False,
) -> bool:
"""Return True when a non-DM session is shared across participants.
Mirrors the isolation rules in :func:`build_session_key`:
- DMs are never shared.
- Threads are shared unless ``thread_sessions_per_user`` is True.
- Non-thread group/channel sessions are shared unless
``group_sessions_per_user`` is True (default: True = isolated).
"""
if source.chat_type == "dm":
return False
if source.thread_id:
return not thread_sessions_per_user
return not group_sessions_per_user
def build_session_key(
source: SessionSource,
group_sessions_per_user: bool = True,
@ -1238,6 +1259,11 @@ def build_session_context(
source=source,
connected_platforms=connected,
home_channels=home_channels,
shared_multi_user_session=is_shared_multi_user_session(
source,
group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
),
)
if session_entry:

View file

@ -225,8 +225,28 @@ def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
def write_pid_file() -> None:
"""Write the current process PID and metadata to the gateway PID file."""
_write_json_file(_get_pid_path(), _build_pid_record())
"""Write the current process PID and metadata to the gateway PID file.
Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
invocations race: exactly one process wins and the rest get
FileExistsError.
"""
path = _get_pid_path()
path.parent.mkdir(parents=True, exist_ok=True)
record = json.dumps(_build_pid_record())
try:
fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
except FileExistsError:
raise # Let caller decide: another gateway is racing us
try:
with os.fdopen(fd, "w", encoding="utf-8") as f:
f.write(record)
except Exception:
try:
path.unlink(missing_ok=True)
except OSError:
pass
raise
def write_runtime_status(

View file

@ -152,6 +152,23 @@ def auth_add_command(args) -> None:
pool = load_pool(provider)
# Clear ALL suppressions for this provider — re-adding a credential is
# a strong signal the user wants auth re-enabled. This covers env:*
# (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
# device_code (codex), etc. One consistent re-engagement pattern.
# Matches the Codex device_code re-link pattern that predates this.
if not provider.startswith(CUSTOM_POOL_PREFIX):
try:
from hermes_cli.auth import (
_load_auth_store,
unsuppress_credential_source,
)
suppressed = _load_auth_store().get("suppressed_sources", {})
for src in list(suppressed.get(provider, []) or []):
unsuppress_credential_source(provider, src)
except Exception:
pass
if requested_type == AUTH_TYPE_API_KEY:
token = (getattr(args, "api_key", None) or "").strip()
if not token:
@ -338,71 +355,28 @@ def auth_remove_command(args) -> None:
raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
print(f"Removed {provider} credential #{index} ({removed.label})")
# If this was an env-seeded credential, also clear the env var from .env
# so it doesn't get re-seeded on the next load_pool() call.
if removed.source.startswith("env:"):
env_var = removed.source[len("env:"):]
if env_var:
from hermes_cli.config import remove_env_value
cleared = remove_env_value(env_var)
if cleared:
print(f"Cleared {env_var} from .env")
# Unified removal dispatch. Every credential source Hermes reads from
# (env vars, external OAuth files, auth.json blocks, custom config)
# has a RemovalStep registered in agent.credential_sources. The step
# handles its source-specific cleanup and we centralise suppression +
# user-facing output here so every source behaves identically from
# the user's perspective.
from agent.credential_sources import find_removal_step
from hermes_cli.auth import suppress_credential_source
# If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
# clear the underlying auth store / credential file so it doesn't get
# re-seeded on the next load_pool() call.
elif provider == "openai-codex" and (
removed.source == "device_code" or removed.source.endswith(":device_code")
):
# Codex tokens live in TWO places: the Hermes auth store and
# ~/.codex/auth.json (the Codex CLI shared file). On every refresh,
# refresh_codex_oauth_pure() writes to both. So clearing only the
# Hermes auth store is not enough — _seed_from_singletons() will
# auto-import from ~/.codex/auth.json on the next load_pool() and
# the removal is instantly undone. Mark the source as suppressed
# so auto-import is skipped; leave ~/.codex/auth.json untouched so
# the Codex CLI itself keeps working.
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
suppress_credential_source,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
print(f"Cleared {provider} OAuth tokens from auth store")
suppress_credential_source(provider, "device_code")
print("Suppressed openai-codex device_code source — it will not be re-seeded.")
print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
print("Run `hermes auth add openai-codex` to re-enable if needed.")
step = find_removal_step(provider, removed.source)
if step is None:
# Unregistered source — e.g. "manual", which has nothing external
# to clean up. The pool entry is already gone; we're done.
return
elif removed.source == "device_code" and provider == "nous":
from hermes_cli.auth import (
_load_auth_store, _save_auth_store, _auth_store_lock,
)
with _auth_store_lock():
auth_store = _load_auth_store()
providers_dict = auth_store.get("providers")
if isinstance(providers_dict, dict) and provider in providers_dict:
del providers_dict[provider]
_save_auth_store(auth_store)
print(f"Cleared {provider} OAuth tokens from auth store")
elif removed.source == "hermes_pkce" and provider == "anthropic":
from hermes_constants import get_hermes_home
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
if oauth_file.exists():
oauth_file.unlink()
print("Cleared Hermes Anthropic OAuth credentials")
elif removed.source == "claude_code" and provider == "anthropic":
from hermes_cli.auth import suppress_credential_source
suppress_credential_source(provider, "claude_code")
print("Suppressed claude_code credential — it will not be re-seeded.")
print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
print("Run `hermes auth add anthropic` to re-enable if needed.")
result = step.remove_fn(provider, removed)
for line in result.cleaned:
print(line)
if result.suppress:
suppress_credential_source(provider, removed.source)
for line in result.hints:
print(line)
def auth_reset_command(args) -> None:

View file

@ -738,6 +738,26 @@ DEFAULT_CONFIG: _DefaultConfig = {
# (terminal and execute_code). Skill-declared required_environment_variables
# are passed through automatically; this list is for non-skill use cases.
"env_passthrough": [],
# Extra files to source in the login shell when building the
# per-session environment snapshot. Use this when tools like nvm,
# pyenv, asdf, or custom PATH entries are registered by files that
# a bash login shell would skip — most commonly ``~/.bashrc``
# (bash doesn't source bashrc in non-interactive login mode) or
# zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
# Paths support ``~`` / ``${VAR}``. Missing files are silently
# skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
# snapshot shell is bash (this is the ``auto_source_bashrc``
# behaviour — disable with that key if you want strict login-only
# semantics).
"shell_init_files": [],
# When true (default), Hermes sources ``~/.bashrc`` in the login
# shell used to build the environment snapshot. This captures
# PATH additions, shell functions, and aliases defined in the
# user's bashrc — which a plain ``bash -l -c`` would otherwise
# miss because bash skips bashrc in non-interactive login mode.
# Turn this off if you have a bashrc that misbehaves when sourced
# non-interactively (e.g. one that hard-exits on TTY checks).
"auto_source_bashrc": True,
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
"docker_forward_env": [],
# Explicit environment variables to set inside Docker containers.
@ -996,6 +1016,7 @@ DEFAULT_CONFIG: _DefaultConfig = {
"record_key": "ctrl+b",
"max_recording_seconds": 120,
"auto_tts": False,
"beep_enabled": True, # Play record start/stop beeps in CLI voice mode
"silence_threshold": 200, # RMS below this = silence (0-32767)
"silence_duration": 3.0, # Seconds of silence before auto-stop
},
@ -1054,6 +1075,20 @@ DEFAULT_CONFIG: _DefaultConfig = {
# always goes to ~/.hermes/skills/.
"skills": {
"external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"]
# Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
# content with the absolute skill directory and the active session id
# before the agent sees it. Lets skill authors reference bundled
# scripts without the agent having to join paths.
"template_vars": True,
# Pre-execute inline shell snippets written as !`cmd` in SKILL.md
# body. Their stdout is inlined into the skill message before the
# agent reads it, so skills can inject dynamic context (dates, git
# state, detected tool versions, …). Off by default because any
# content from the skill author runs on the host without approval;
# only enable for skill sources you trust.
"inline_shell": False,
# Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
"inline_shell_timeout": 10,
},
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
@ -1200,7 +1235,7 @@ DEFAULT_CONFIG: _DefaultConfig = {
},
# Config schema version - bump this when adding new required fields
"_config_version": 21,
"_config_version": 22,
}
# =============================================================================

View file

@ -1327,7 +1327,6 @@ def cmd_whatsapp(args):
except (EOFError, KeyboardInterrupt):
response = "n"
if response.lower() in ("y", "yes"):
shutil.rmtree(session_dir, ignore_errors=True)
session_dir.mkdir(parents=True, exist_ok=True)
print(" ✓ Session cleared")
@ -5213,7 +5212,9 @@ def _install_hangup_protection(gateway_mode: bool = False):
# (2) Mirror output to update.log and wrap stdio for broken-pipe
# tolerance. Any failure here is non-fatal; we just skip the wrap.
try:
logs_dir = get_hermes_home() / "logs"
from hermes_cli.config import get_hermes_home as _get_hermes_home
logs_dir = _get_hermes_home() / "logs"
logs_dir.mkdir(parents=True, exist_ok=True)
log_path = logs_dir / "update.log"
log_file = open(log_path, "a", buffering=1, encoding="utf-8")

View file

@ -292,6 +292,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"big-pickle",
],
"opencode-go": [
"kimi-k2.6",
"kimi-k2.5",
"glm-5.1",
"glm-5",
@ -299,6 +300,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
"mimo-v2-omni",
"minimax-m2.7",
"minimax-m2.5",
"qwen3.6-plus",
"qwen3.5-plus",
],
"kilocode": [
"anthropic/claude-opus-4.6",
@ -685,6 +688,31 @@ def _openrouter_model_is_free(pricing: Any) -> bool:
return False
def _openrouter_model_supports_tools(item: Any) -> bool:
"""Return True when the model's ``supported_parameters`` advertise tool calling.
hermes-agent is tool-calling-first every provider path assumes the model
can invoke tools. Models that don't advertise ``tools`` in their
``supported_parameters`` (e.g. image-only or completion-only models) cannot
be driven by the agent loop and would fail at the first tool call.
**Permissive when the field is missing.** Some OpenRouter-compatible gateways
(Nous Portal, private mirrors, older catalog snapshots) don't populate
``supported_parameters`` at all. Treat that as "unknown capability → allow"
so the picker doesn't silently empty for those users. Only hide models
whose ``supported_parameters`` is an explicit list that omits ``tools``.
Ported from Kilo-Org/kilocode#9068.
"""
if not isinstance(item, dict):
return True
params = item.get("supported_parameters")
if not isinstance(params, list):
# Field absent / malformed / None — be permissive.
return True
return "tools" in params
def fetch_openrouter_models(
timeout: float = 8.0,
*,
@ -727,6 +755,11 @@ def fetch_openrouter_models(
live_item = live_by_id.get(preferred_id)
if live_item is None:
continue
# Hide models that don't advertise tool-calling support — hermes-agent
# requires it and surfacing them leads to immediate runtime failures
# when the user selects them. Ported from Kilo-Org/kilocode#9068.
if not _openrouter_model_supports_tools(live_item):
continue
desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
curated.append((preferred_id, desc))
@ -2393,13 +2426,70 @@ def validate_requested_model(
except Exception:
pass # Fall through to generic warning
# Static-catalog fallback: when the /models probe was unreachable,
# validate against the curated list from provider_model_ids() — same
# pattern as the openai-codex and minimax branches above. This fixes
# /model switches in the gateway for providers like opencode-go and
# opencode-zen whose /models endpoint returns 404 against the HTML
# marketing site. Without this block, validate_requested_model would
# reject every model on such providers, switch_model() would return
# success=False, and the gateway would never write to
# _session_model_overrides.
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
try:
catalog_models = provider_model_ids(normalized)
except Exception:
catalog_models = []
if catalog_models:
catalog_lower = {m.lower(): m for m in catalog_models}
if requested_for_lookup.lower() in catalog_lower:
return {
"accepted": True,
"persist": True,
"recognized": True,
"message": None,
}
catalog_lower_list = list(catalog_lower.keys())
auto = get_close_matches(
requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9
)
if auto:
corrected = catalog_lower[auto[0]]
return {
"accepted": True,
"persist": True,
"recognized": True,
"corrected_model": corrected,
"message": f"Auto-corrected `{requested}` → `{corrected}`",
}
suggestions = get_close_matches(
requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5
)
suggestion_text = ""
if suggestions:
suggestion_text = "\n Similar models: " + ", ".join(
f"`{catalog_lower[s]}`" for s in suggestions
)
return {
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Note: `{requested}` was not found in the {provider_label} curated catalog "
f"and the /models endpoint was unreachable.{suggestion_text}"
f"\n The model may still work if it exists on the provider."
),
}
# No catalog available — accept with a warning, matching the comment's
# stated intent ("Accept and persist, but warn").
return {
"accepted": False,
"persist": False,
"accepted": True,
"persist": True,
"recognized": False,
"message": (
f"Could not reach the {provider_label} API to validate `{requested}`. "
f"Note: could not reach the {provider_label} API to validate `{requested}`. "
f"If the service isn't down, this model may not be valid."
),
}

View file

@ -10,6 +10,7 @@ from hermes_cli.auth import get_nous_auth_status
from hermes_cli.config import get_env_value, load_config
from tools.managed_tool_gateway import is_managed_tool_gateway_ready
from tools.tool_backend_helpers import (
fal_key_is_configured,
has_direct_modal_credentials,
managed_nous_tools_enabled,
normalize_browser_cloud_provider,
@ -271,7 +272,7 @@ def get_nous_subscription_features(
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
direct_fal = bool(get_env_value("FAL_KEY"))
direct_fal = fal_key_is_configured()
direct_openai_tts = bool(resolve_openai_audio_api_key())
direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
direct_camofox = bool(get_env_value("CAMOFOX_URL"))
@ -520,7 +521,7 @@ def apply_nous_managed_defaults(
browser_cfg["cloud_provider"] = "browser-use"
changed.add("browser")
if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
if "image_gen" in selected_toolsets and not fal_key_is_configured():
changed.add("image_gen")
return changed
@ -548,7 +549,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
or get_env_value("TAVILY_API_KEY")
or get_env_value("EXA_API_KEY")
),
"image_gen": bool(get_env_value("FAL_KEY")),
"image_gen": fal_key_is_configured(),
"tts": bool(
resolve_openai_audio_api_key()
or get_env_value("ELEVENLABS_API_KEY")

View file

@ -492,8 +492,12 @@ def _resolve_openrouter_runtime(
else:
# Custom endpoint: use api_key from config when using config base_url (#1760).
# When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
# the canonical env var for ollama.com authentication.
_is_ollama_url = "ollama.com" in base_url.lower()
# the canonical env var for ollama.com authentication. Match on
# HOST, not substring — a custom base_url whose path contains
# "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
# hostname is a look-alike (ollama.com.attacker.test) must not
# receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
api_key_candidates = [
explicit_api_key,
(cfg_api_key if use_config_base_url else ""),

View file

@ -102,7 +102,7 @@ _DEFAULT_PROVIDER_MODELS = {
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
"opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
"huggingface": [
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
@ -441,6 +441,16 @@ def _print_setup_summary(config: dict, hermes_home):
tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
else:
tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
elif tts_provider == "kittentts":
try:
import importlib.util
kittentts_ok = importlib.util.find_spec("kittentts") is not None
except Exception:
kittentts_ok = False
if kittentts_ok:
tool_status.append(("Text-to-Speech (KittenTTS local)", True, None))
else:
tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'"))
else:
tool_status.append(("Text-to-Speech (Edge TTS)", True, None))
@ -901,6 +911,31 @@ def _install_neutts_deps() -> bool:
return False
def _install_kittentts_deps() -> bool:
"""Install KittenTTS dependencies with user approval. Returns True on success."""
import subprocess
import sys
wheel_url = (
"https://github.com/KittenML/KittenTTS/releases/download/"
"0.8.1/kittentts-0.8.1-py3-none-any.whl"
)
print()
print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...")
print()
try:
subprocess.run(
[sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
check=True, timeout=300,
)
print_success("kittentts installed successfully")
return True
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
print_error(f"Failed to install kittentts: {e}")
print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile")
return False
def _setup_tts_provider(config: dict):
"""Interactive TTS provider selection with install flow for NeuTTS."""
tts_config = config.get("tts", {})
@ -916,6 +951,7 @@ def _setup_tts_provider(config: dict):
"mistral": "Mistral Voxtral TTS",
"gemini": "Google Gemini TTS",
"neutts": "NeuTTS",
"kittentts": "KittenTTS",
}
current_label = provider_labels.get(current_provider, current_provider)
@ -939,9 +975,10 @@ def _setup_tts_provider(config: dict):
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
"Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
"NeuTTS (local on-device, free, ~300MB model download)",
"KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)",
]
)
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"])
choices.append(f"Keep current ({current_label})")
keep_current_idx = len(choices) - 1
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
@ -1060,6 +1097,29 @@ def _setup_tts_provider(config: dict):
print_warning("No API key provided. Falling back to Edge TTS.")
selected = "edge"
elif selected == "kittentts":
# Check if already installed
try:
import importlib.util
already_installed = importlib.util.find_spec("kittentts") is not None
except Exception:
already_installed = False
if already_installed:
print_success("KittenTTS is already installed")
else:
print()
print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).")
print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
print()
if prompt_yes_no("Install KittenTTS now?", True):
if not _install_kittentts_deps():
print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.")
selected = "edge"
else:
print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.")
selected = "edge"
# Save the selection
if "tts" not in config:
config["tts"] = {}

View file

@ -24,7 +24,7 @@ from hermes_cli.nous_subscription import (
apply_nous_managed_defaults,
get_nous_subscription_features,
)
from tools.tool_backend_helpers import managed_nous_tools_enabled
from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
from utils import base_url_hostname
logger = logging.getLogger(__name__)
@ -182,6 +182,14 @@ TOOL_CATEGORIES = {
],
"tts_provider": "gemini",
},
{
"name": "KittenTTS",
"badge": "local · free",
"tag": "Lightweight local ONNX TTS (~25MB), no API key",
"env_vars": [],
"tts_provider": "kittentts",
"post_setup": "kittentts",
},
],
},
"web": {
@ -423,6 +431,36 @@ def _run_post_setup(post_setup_key: str):
_print_warning(" Node.js not found. Install Camofox via Docker:")
_print_info(" docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
elif post_setup_key == "kittentts":
try:
__import__("kittentts")
_print_success(" kittentts is already installed")
return
except ImportError:
pass
import subprocess
_print_info(" Installing kittentts (~25-80MB model, CPU-only)...")
wheel_url = (
"https://github.com/KittenML/KittenTTS/releases/download/"
"0.8.1/kittentts-0.8.1-py3-none-any.whl"
)
try:
result = subprocess.run(
[sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
capture_output=True, text=True, timeout=300,
)
if result.returncode == 0:
_print_success(" kittentts installed")
_print_info(" Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
_print_info(" Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)")
else:
_print_warning(" kittentts install failed:")
_print_info(f" {result.stderr.strip()[:300]}")
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
except subprocess.TimeoutExpired:
_print_warning(" kittentts install timed out (>5min)")
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
elif post_setup_key == "rl_training":
try:
__import__("tinker_atropos")
@ -833,7 +871,7 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
browser_cfg = config.get("browser", {})
return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
if ts_key == "image_gen":
return not get_env_value("FAL_KEY")
return not fal_key_is_configured()
return not _toolset_has_keys(ts_key, config)

View file

@ -114,6 +114,91 @@ def _require_token(request: Request) -> None:
raise HTTPException(status_code=401, detail="Unauthorized")
# Accepted Host header values for loopback binds. DNS rebinding attacks
# point a victim browser at an attacker-controlled hostname (evil.test)
# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin
# checks because the browser now considers evil.test and our dashboard
# "same origin". Validating the Host header at the app layer rejects any
# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7.
_LOOPBACK_HOST_VALUES: frozenset = frozenset({
"localhost", "127.0.0.1", "::1",
})
def _is_accepted_host(host_header: str, bound_host: str) -> bool:
"""True if the Host header targets the interface we bound to.
Accepts:
- Exact bound host (with or without port suffix)
- Loopback aliases when bound to loopback
- Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback,
no protection possible at this layer)
"""
if not host_header:
return False
# Strip port suffix. IPv6 addresses use bracket notation:
# [::1] — no port
# [::1]:9119 — with port
# Plain hosts/v4:
# localhost:9119
# 127.0.0.1:9119
h = host_header.strip()
if h.startswith("["):
# IPv6 bracketed — port (if any) follows "]:"
close = h.find("]")
if close != -1:
host_only = h[1:close] # strip brackets
else:
host_only = h.strip("[]")
else:
host_only = h.rsplit(":", 1)[0] if ":" in h else h
host_only = host_only.lower()
# 0.0.0.0 bind means operator explicitly opted into all-interfaces
# (requires --insecure per web_server.start_server). No Host-layer
# defence can protect that mode; rely on operator network controls.
if bound_host in ("0.0.0.0", "::"):
return True
# Loopback bind: accept the loopback names
bound_lc = bound_host.lower()
if bound_lc in _LOOPBACK_HOST_VALUES:
return host_only in _LOOPBACK_HOST_VALUES
# Explicit non-loopback bind: require exact host match
return host_only == bound_lc
@app.middleware("http")
async def host_header_middleware(request: Request, call_next):
"""Reject requests whose Host header doesn't match the bound interface.
Defends against DNS rebinding: a victim browser on a localhost
dashboard is tricked into fetching from an attacker hostname that
TTL-flips to 127.0.0.1. CORS and same-origin checks don't help —
the browser now treats the attacker origin as same-origin with the
dashboard. Host-header validation at the app layer catches it.
See GHSA-ppp5-vxwm-4cf7.
"""
# Store the bound host on app.state so this middleware can read it —
# set by start_server() at listen time.
bound_host = getattr(app.state, "bound_host", None)
if bound_host:
host_header = request.headers.get("host", "")
if not _is_accepted_host(host_header, bound_host):
return JSONResponse(
status_code=400,
content={
"detail": (
"Invalid Host header. Dashboard requests must use "
"the hostname the server was bound to."
),
},
)
return await call_next(request)
@app.middleware("http")
async def auth_middleware(request: Request, call_next):
"""Require the session token on all /api/ routes except the public list."""
@ -2323,6 +2408,10 @@ def start_server(
"authentication. Only use on trusted networks.", host,
)
# Record the bound host so host_header_middleware can validate incoming
# Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
app.state.bound_host = host
if open_browser:
import webbrowser

View file

@ -0,0 +1,3 @@
# Dogfood — Advanced QA & Testing Skills
Specialized QA workflows that go beyond basic bug-finding. These skills use structured methodologies to surface UX friction, accessibility issues, and product-level problems that standard testing misses.

View file

@ -0,0 +1,190 @@
---
name: adversarial-ux-test
description: Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable tickets from genuine issues only.
version: 1.0.0
author: Omni @ Comelse
license: MIT
metadata:
hermes:
tags: [qa, ux, testing, adversarial, dogfood, personas, user-testing]
related_skills: [dogfood]
---
# Adversarial UX Test
Roleplay the worst-case user for your product — the person who hates technology, doesn't want your software, and will find every reason to complain. Then filter their feedback through a pragmatism layer to separate real UX problems from "I hate computers" noise.
Think of it as an automated "mom test" — but angry.
## Why This Works
Most QA finds bugs. This finds **friction**. A technically correct app can still be unusable for real humans. The adversarial persona catches:
- Confusing terminology that makes sense to developers but not users
- Too many steps to accomplish basic tasks
- Missing onboarding or "aha moments"
- Accessibility issues (font size, contrast, click targets)
- Cold-start problems (empty states, no demo content)
- Paywall/signup friction that kills conversion
The **pragmatism filter** (Phase 3) is what makes this useful instead of just entertaining. Without it, you'd add a "print this page" button to every screen because Grandpa can't figure out PDFs.
## How to Use
Tell the agent:
```
"Run an adversarial UX test on [URL]"
"Be a grumpy [persona type] and test [app name]"
"Do an asshole user test on my staging site"
```
You can provide a persona or let the agent generate one based on your product's target audience.
## Step 1: Define the Persona
If no persona is provided, generate one by answering:
1. **Who is the HARDEST user for this product?** (age 50+, non-technical role, decades of experience doing it "the old way")
2. **What is their tech comfort level?** (the lower the better — WhatsApp-only, paper notebooks, wife set up their email)
3. **What is the ONE thing they need to accomplish?** (their core job, not your feature list)
4. **What would make them give up?** (too many clicks, jargon, slow, confusing)
5. **How do they talk when frustrated?** (blunt, sweary, dismissive, sighing)
### Good Persona Example
> **"Big Mick" McAllister** — 58-year-old S&C coach. Uses WhatsApp and that's it. His "spreadsheet" is a paper notebook. "If I can't figure it out in 10 seconds I'm going back to my notebook." Needs to log session results for 25 players. Hates small text, jargon, and passwords.
### Bad Persona Example
> "A user who doesn't like the app" — too vague, no constraints, no voice.
The persona must be **specific enough to stay in character** for 20 minutes of testing.
## Step 2: Become the Asshole (Browse as the Persona)
1. Read any available project docs for app context and URLs
2. **Fully inhabit the persona** — their frustrations, limitations, goals
3. Navigate to the app using browser tools
4. **Attempt the persona's ACTUAL TASKS** (not a feature tour):
- Can they do what they came to do?
- How many clicks/screens to accomplish it?
- What confuses them?
- What makes them angry?
- Where do they get lost?
- What would make them give up and go back to their old way?
5. Test these friction categories:
- **First impression** — would they even bother past the landing page?
- **Core workflow** — the ONE thing they need to do most often
- **Error recovery** — what happens when they do something wrong?
- **Readability** — text size, contrast, information density
- **Speed** — does it feel faster than their current method?
- **Terminology** — any jargon they wouldn't understand?
- **Navigation** — can they find their way back? do they know where they are?
6. Take screenshots of every pain point
7. Check browser console for JS errors on every page
## Step 3: The Rant (Write Feedback in Character)
Write the feedback AS THE PERSONA — in their voice, with their frustrations. This is not a bug report. This is a real human venting.
```
[PERSONA NAME]'s Review of [PRODUCT]
Overall: [Would they keep using it? Yes/No/Maybe with conditions]
THE GOOD (grudging admission):
- [things even they have to admit work]
THE BAD (legitimate UX issues):
- [real problems that would stop them from using the product]
THE UGLY (showstoppers):
- [things that would make them uninstall/cancel immediately]
SPECIFIC COMPLAINTS:
1. [Page/feature]: "[quote in persona voice]" — [what happened, expected]
2. ...
VERDICT: "[one-line persona quote summarizing their experience]"
```
## Step 4: The Pragmatism Filter (Critical — Do Not Skip)
Step OUT of the persona. Evaluate each complaint as a product person:
- **RED: REAL UX BUG** — Any user would have this problem, not just grumpy ones. Fix it.
- **YELLOW: VALID BUT LOW PRIORITY** — Real issue but only for extreme users. Note it.
- **WHITE: PERSONA NOISE** — "I hate computers" talking, not a product problem. Skip it.
- **GREEN: FEATURE REQUEST** — Good idea hidden in the complaint. Consider it.
### Filter Criteria
1. Would a 35-year-old competent-but-busy user have the same complaint? → RED
2. Is this a genuine accessibility issue (font size, contrast, click targets)? → RED
3. Is this "I want it to work like paper" resistance to digital? → WHITE
4. Is this a real workflow inefficiency the persona stumbled on? → YELLOW or RED
5. Would fixing this add complexity for the 80% who are fine? → WHITE
6. Does the complaint reveal a missing onboarding moment? → GREEN
**This filter is MANDATORY.** Never ship raw persona complaints as tickets.
## Step 5: Create Tickets
For **RED** and **GREEN** items only:
- Clear, actionable title
- Include the persona's verbatim quote (entertaining + memorable)
- The real UX issue underneath (objective)
- A suggested fix (actionable)
- Tag/label: "ux-review"
For **YELLOW** items: one catch-all ticket with all notes.
**WHITE** items appear in the report only. No tickets.
**Max 10 tickets per session** — focus on the worst issues.
## Step 6: Report
Deliver:
1. The persona rant (Step 3) — entertaining and visceral
2. The filtered assessment (Step 4) — pragmatic and actionable
3. Tickets created (Step 5) — with links
4. Screenshots of key issues
## Tips
- **One persona per session.** Don't mix perspectives.
- **Stay in character during Steps 2-3.** Break character only at Step 4.
- **Test the CORE WORKFLOW first.** Don't get distracted by settings pages.
- **Empty states are gold.** New user experience reveals the most friction.
- **The best findings are RED items the persona found accidentally** while trying to do something else.
- **If the persona has zero complaints, your persona is too tech-savvy.** Make them older, less patient, more set in their ways.
- **Run this before demos, launches, or after shipping a batch of features.**
- **Register as a NEW user when possible.** Don't use pre-seeded admin accounts — the cold start experience is where most friction lives.
- **Zero WHITE items is a signal, not a failure.** If the pragmatism filter finds no noise, your product has real UX problems, not just a grumpy persona.
- **Check known issues in project docs AFTER the test.** If the persona found a bug that's already in the known issues list, that's actually the most damning finding — it means the team knew about it but never felt the user's pain.
- **Subscription/paywall testing is critical.** Test with expired accounts, not just active ones. The "what happens when you can't pay" experience reveals whether the product respects users or holds their data hostage.
- **Count the clicks to accomplish the persona's ONE task.** If it's more than 5, that's almost always a RED finding regardless of persona tech level.
## Example Personas by Industry
These are starting points — customize for your specific product:
| Product Type | Persona | Age | Key Trait |
|-------------|---------|-----|-----------|
| CRM | Retirement home director | 68 | Filing cabinet is the current CRM |
| Photography SaaS | Rural wedding photographer | 62 | Books clients by phone, invoices on paper |
| AI/ML Tool | Department store buyer | 55 | Burned by 3 failed tech startups |
| Fitness App | Old-school gym coach | 58 | Paper notebook, thick fingers, bad eyes |
| Accounting | Family bakery owner | 64 | Shoebox of receipts, hates subscriptions |
| E-commerce | Market stall vendor | 60 | Cash only, smartphone is for calls |
| Healthcare | Senior GP | 63 | Dictates notes, nurse handles the computer |
| Education | Veteran teacher | 57 | Chalk and talk, worksheets in ring binders |
## Rules
- Stay in character during Steps 2-3
- Be genuinely mean but fair — find real problems, not manufactured ones
- The pragmatism filter (Step 4) is **MANDATORY**
- Screenshots required for every complaint
- Max 10 tickets per session
- Test on staging/deployed app, not local dev
- One persona, one session, one report

View file

@ -1,36 +0,0 @@
# NOTE: This file is maintained for convenience only.
# The canonical dependency list is in pyproject.toml.
# Preferred install: pip install -e ".[all]"
# Core dependencies
openai
python-dotenv
fire
httpx
rich
tenacity
prompt_toolkit
pyyaml
requests
jinja2
pydantic>=2.0
PyJWT[crypto]
debugpy
# Web tools
firecrawl-py
parallel-web>=0.4.2
# Image generation
fal-client
# Text-to-speech (Edge TTS is free, no API key needed)
edge-tts
# Optional: For cron expression parsing (cronjob scheduling)
croniter
# Optional: For messaging platform integrations (gateway)
python-telegram-bot[webhooks]>=22.6
discord.py>=2.0
aiohttp>=3.9.0

View file

@ -127,7 +127,7 @@ from agent.trajectory import (
convert_scratchpad_to_think, has_incomplete_scratchpad,
save_trajectory as _save_trajectory_to_file,
)
from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled
from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled, normalize_proxy_url
@ -190,7 +190,7 @@ def _get_proxy_from_env() -> Optional[str]:
"https_proxy", "http_proxy", "all_proxy"):
value = os.environ.get(key, "").strip()
if value:
return value
return normalize_proxy_url(value)
return None
@ -2358,6 +2358,13 @@ class AIAgent:
cost reduction as direct Anthropic callers, provided their
gateway implements the Anthropic cache_control contract
(MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do).
Qwen / Alibaba-family models on OpenCode, OpenCode Go, and direct
Alibaba (DashScope) also honour Anthropic-style ``cache_control``
markers on OpenAI-wire chat completions. Upstream pi-mono #3392 /
pi #3393 documented this for opencode-go Qwen. Without markers
these providers serve zero cache hits, re-billing the full prompt
on every turn.
"""
eff_provider = (provider if provider is not None else self.provider) or ""
eff_base_url = base_url if base_url is not None else (self.base_url or "")
@ -2365,7 +2372,9 @@ class AIAgent:
eff_model = (model if model is not None else self.model) or ""
base_lower = eff_base_url.lower()
is_claude = "claude" in eff_model.lower()
model_lower = eff_model.lower()
provider_lower = eff_provider.lower()
is_claude = "claude" in model_lower
is_openrouter = base_url_host_matches(eff_base_url, "openrouter.ai")
is_anthropic_wire = eff_api_mode == "anthropic_messages"
is_native_anthropic = (
@ -2380,6 +2389,22 @@ class AIAgent:
if is_anthropic_wire and is_claude:
# Third-party Anthropic-compatible gateway.
return True, True
# Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire
# transport that accepts Anthropic-style cache_control markers and
# rewards them with real cache hits. Without this branch
# qwen3.6-plus on opencode-go reports 0% cached tokens and burns
# through the subscription on every turn.
model_is_qwen = "qwen" in model_lower
provider_is_alibaba_family = provider_lower in {
"opencode", "opencode-zen", "opencode-go", "alibaba",
}
if provider_is_alibaba_family and model_is_qwen:
# Envelope layout (native_anthropic=False): markers on inner
# content parts, not top-level tool messages. Matches
# pi-mono's "alibaba" cacheControlFormat.
return True, False
return False, False
@staticmethod
@ -6126,8 +6151,9 @@ class AIAgent:
fb_base_url_hint = (fb.get("base_url") or "").strip() or None
fb_api_key_hint = (fb.get("api_key") or "").strip() or None
# For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env
# when no explicit key is in the fallback config.
if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
# when no explicit key is in the fallback config. Host match
# (not substring) — see GHSA-76xc-57q6-vm5m.
if fb_base_url_hint and base_url_host_matches(fb_base_url_hint, "ollama.com") and not fb_api_key_hint:
fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
fb_client, _resolved_fb_model = resolve_provider_client(
fb_provider, model=fb_model, raw_codex=True,
@ -6548,6 +6574,15 @@ class AIAgent:
return suffix
return "[A multimodal message was converted to text for Anthropic compatibility.]"
def _get_anthropic_transport(self):
"""Return the cached AnthropicTransport instance (lazy singleton)."""
t = getattr(self, "_anthropic_transport", None)
if t is None:
from agent.transports import get_transport
t = get_transport("anthropic_messages")
self._anthropic_transport = t
return t
def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
if not any(
isinstance(msg, dict) and self._content_has_image_parts(msg.get("content"))
@ -6664,20 +6699,14 @@ class AIAgent:
def _build_api_kwargs(self, api_messages: list) -> dict:
"""Build the keyword arguments dict for the active API mode."""
if self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import build_anthropic_kwargs
_transport = self._get_anthropic_transport()
anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
# Pass context_length (total input+output window) so the adapter can
# clamp max_tokens (output cap) when the user configured a smaller
# context window than the model's native output limit.
ctx_len = getattr(self, "context_compressor", None)
ctx_len = ctx_len.context_length if ctx_len else None
# _ephemeral_max_output_tokens is set for one call when the API
# returns "max_tokens too large given prompt" — it caps output to
# the available window space without touching context_length.
ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
if ephemeral_out is not None:
self._ephemeral_max_output_tokens = None # consume immediately
return build_anthropic_kwargs(
return _transport.build_kwargs(
model=self.model,
messages=anthropic_messages,
tools=self.tools,
@ -6909,6 +6938,34 @@ class AIAgent:
# (the documented max output for qwen3-coder models) so the
# model has adequate output budget for tool calls.
api_kwargs.update(self._max_tokens_param(65536))
elif (
base_url_host_matches(self.base_url, "api.kimi.com")
or base_url_host_matches(self.base_url, "moonshot.ai")
or base_url_host_matches(self.base_url, "moonshot.cn")
):
# Kimi/Moonshot defaults to a low max_tokens when omitted.
# Reasoning tokens share the output budget — without an explicit
# value the model can exhaust it on thinking alone, causing
# "Response truncated due to output length limit". 32000 matches
# Kimi CLI's default (see MoonshotAI/kimi-cli kimi.py generate()).
api_kwargs.update(self._max_tokens_param(32000))
# Kimi requires reasoning_effort as a top-level chat completions
# parameter (not inside extra_body). Mirror Kimi CLI's
# with_generation_kwargs(reasoning_effort=...) / with_thinking():
# when thinking is disabled, Kimi CLI omits reasoning_effort
# entirely (maps to None).
_kimi_thinking_off = bool(
self.reasoning_config
and isinstance(self.reasoning_config, dict)
and self.reasoning_config.get("enabled") is False
)
if not _kimi_thinking_off:
_kimi_effort = "medium"
if self.reasoning_config and isinstance(self.reasoning_config, dict):
_e = (self.reasoning_config.get("effort") or "").strip().lower()
if _e in ("low", "medium", "high"):
_kimi_effort = _e
api_kwargs["reasoning_effort"] = _kimi_effort
elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
# OpenRouter and Nous Portal translate requests to Anthropic's
# Messages API, which requires max_tokens as a mandatory field.
@ -6940,6 +6997,24 @@ class AIAgent:
extra_body["provider"] = provider_preferences
_is_nous = "nousresearch" in self._base_url_lower
# Kimi/Moonshot API uses extra_body.thinking (separate from the
# top-level reasoning_effort) to enable/disable reasoning mode.
# Mirror Kimi CLI's with_thinking() behavior exactly — see
# MoonshotAI/kimi-cli packages/kosong/src/kosong/chat_provider/kimi.py
_is_kimi = (
base_url_host_matches(self.base_url, "api.kimi.com")
or base_url_host_matches(self.base_url, "moonshot.ai")
or base_url_host_matches(self.base_url, "moonshot.cn")
)
if _is_kimi:
_kimi_thinking_enabled = True
if self.reasoning_config and isinstance(self.reasoning_config, dict):
if self.reasoning_config.get("enabled") is False:
_kimi_thinking_enabled = False
extra_body["thinking"] = {
"type": "enabled" if _kimi_thinking_enabled else "disabled",
}
if self._supports_reasoning_extra_body():
if _is_github_models:
github_reasoning = self._github_models_reasoning_extra_body()
@ -7362,9 +7437,9 @@ class AIAgent:
codex_kwargs["max_output_tokens"] = 5120
response = self._run_codex_stream(codex_kwargs)
elif not _aux_available and self.api_mode == "anthropic_messages":
# Native Anthropic — use the Anthropic client directly
from agent.anthropic_adapter import build_anthropic_kwargs as _build_ant_kwargs
ant_kwargs = _build_ant_kwargs(
# Native Anthropic — use the transport for kwargs
_tflush = self._get_anthropic_transport()
ant_kwargs = _tflush.build_kwargs(
model=self.model, messages=api_messages,
tools=[memory_tool_def], max_tokens=5120,
reasoning_config=None,
@ -7392,10 +7467,15 @@ class AIAgent:
if assistant_msg and assistant_msg.tool_calls:
tool_calls = assistant_msg.tool_calls
elif self.api_mode == "anthropic_messages" and not _aux_available:
from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush
_flush_msg, _ = _nar_flush(response, strip_tool_prefix=self._is_anthropic_oauth)
if _flush_msg and _flush_msg.tool_calls:
tool_calls = _flush_msg.tool_calls
_tfn = self._get_anthropic_transport()
_flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
if _flush_nr and _flush_nr.tool_calls:
tool_calls = [
SimpleNamespace(
id=tc.id, type="function",
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
) for tc in _flush_nr.tool_calls
]
elif hasattr(response, "choices") and response.choices:
assistant_message = response.choices[0].message
if assistant_message.tool_calls:
@ -8455,14 +8535,14 @@ class AIAgent:
summary_kwargs["extra_body"] = summary_extra_body
if self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar
_ant_kw = _bak(model=self.model, messages=api_messages, tools=None,
_tsum = self._get_anthropic_transport()
_ant_kw = _tsum.build_kwargs(model=self.model, messages=api_messages, tools=None,
max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
is_oauth=self._is_anthropic_oauth,
preserve_dots=self._anthropic_preserve_dots())
summary_response = self._anthropic_messages_create(_ant_kw)
_msg, _ = _nar(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
final_response = (_msg.content or "").strip()
_sum_nr = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
final_response = (_sum_nr.content or "").strip()
else:
summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
@ -8487,14 +8567,14 @@ class AIAgent:
retry_msg, _ = self._normalize_codex_response(retry_response)
final_response = (retry_msg.content or "").strip() if retry_msg else ""
elif self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2
_ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None,
_tretry = self._get_anthropic_transport()
_ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None,
is_oauth=self._is_anthropic_oauth,
max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
preserve_dots=self._anthropic_preserve_dots())
retry_response = self._anthropic_messages_create(_ant_kw2)
_retry_msg, _ = _nar2(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
final_response = (_retry_msg.content or "").strip()
_retry_nr = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
final_response = (_retry_nr.content or "").strip()
else:
summary_kwargs = {
"model": self.model,
@ -9363,16 +9443,13 @@ class AIAgent:
response_invalid = True
error_details.append("response.output is empty")
elif self.api_mode == "anthropic_messages":
content_blocks = getattr(response, "content", None) if response is not None else None
if response is None:
_tv = self._get_anthropic_transport()
if not _tv.validate_response(response):
response_invalid = True
error_details.append("response is None")
elif not isinstance(content_blocks, list):
response_invalid = True
error_details.append("response.content is not a list")
elif not content_blocks:
response_invalid = True
error_details.append("response.content is empty")
if response is None:
error_details.append("response is None")
else:
error_details.append("response.content invalid (not a non-empty list)")
else:
if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices:
response_invalid = True
@ -9533,8 +9610,8 @@ class AIAgent:
else:
finish_reason = "stop"
elif self.api_mode == "anthropic_messages":
stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"}
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
_tfr = self._get_anthropic_transport()
finish_reason = _tfr.map_finish_reason(response.stop_reason)
else:
finish_reason = response.choices[0].finish_reason
assistant_message = response.choices[0].message
@ -9563,10 +9640,24 @@ class AIAgent:
if self.api_mode in ("chat_completions", "bedrock_converse"):
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
elif self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import normalize_anthropic_response
_trunc_msg, _ = normalize_anthropic_response(
_trunc_nr = self._get_anthropic_transport().normalize_response(
response, strip_tool_prefix=self._is_anthropic_oauth
)
_trunc_msg = SimpleNamespace(
content=_trunc_nr.content,
tool_calls=[
SimpleNamespace(
id=tc.id, type="function",
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
) for tc in (_trunc_nr.tool_calls or [])
] or None,
reasoning=_trunc_nr.reasoning,
reasoning_content=None,
reasoning_details=(
_trunc_nr.provider_data.get("reasoning_details")
if _trunc_nr.provider_data else None
),
)
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
_trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
@ -9822,21 +9913,27 @@ class AIAgent:
if self.verbose_logging:
logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
# Log cache hit stats when prompt caching is active
if self._use_prompt_caching:
if self.api_mode == "anthropic_messages":
# Anthropic uses cache_read_input_tokens / cache_creation_input_tokens
cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0
written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0
else:
# OpenRouter uses prompt_tokens_details.cached_tokens
details = getattr(response.usage, 'prompt_tokens_details', None)
cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
prompt = usage_dict["prompt_tokens"]
# Surface cache hit stats for any provider that reports
# them — not just those where we inject cache_control
# markers. OpenAI/Kimi/DeepSeek/Qwen all do automatic
# server-side prefix caching and return
# ``prompt_tokens_details.cached_tokens``; users
# previously could not see their cache % because this
# line was gated on ``_use_prompt_caching``, which is
# only True for Anthropic-style marker injection.
# ``canonical_usage`` is already normalised from all
# three API shapes (Anthropic / Codex / OpenAI-chat)
# so we can rely on its values directly.
cached = canonical_usage.cache_read_tokens
written = canonical_usage.cache_write_tokens
prompt = usage_dict["prompt_tokens"]
if (cached or written) and not self.quiet_mode:
hit_pct = (cached / prompt * 100) if prompt > 0 else 0
if not self.quiet_mode:
self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
self._vprint(
f"{self.log_prefix} 💾 Cache: "
f"{cached:,}/{prompt:,} tokens "
f"({hit_pct:.0f}% hit, {written:,} written)"
)
has_retried_429 = False # Reset on success
# Clear Nous rate limit state on successful request —
@ -10772,10 +10869,31 @@ class AIAgent:
if self.api_mode == "codex_responses":
assistant_message, finish_reason = self._normalize_codex_response(response)
elif self.api_mode == "anthropic_messages":
from agent.anthropic_adapter import normalize_anthropic_response
assistant_message, finish_reason = normalize_anthropic_response(
_transport = self._get_anthropic_transport()
_nr = _transport.normalize_response(
response, strip_tool_prefix=self._is_anthropic_oauth
)
# Back-compat shim: downstream code expects SimpleNamespace with
# .content, .tool_calls, .reasoning, .reasoning_content,
# .reasoning_details attributes.
assistant_message = SimpleNamespace(
content=_nr.content,
tool_calls=[
SimpleNamespace(
id=tc.id,
type="function",
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
)
for tc in (_nr.tool_calls or [])
] or None,
reasoning=_nr.reasoning,
reasoning_content=None,
reasoning_details=(
_nr.provider_data.get("reasoning_details")
if _nr.provider_data else None
),
)
finish_reason = _nr.finish_reason
else:
assistant_message = response.choices[0].message

View file

@ -56,6 +56,8 @@ AUTHOR_MAP = {
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
"valdi.jorge@gmail.com": "jvcl",
"francip@gmail.com": "francip",
"omni@comelse.com": "omnissiah-comelse",
"oussama.redcode@gmail.com": "mavrickdeveloper",
"126368201+vilkasdev@users.noreply.github.com": "vilkasdev",
"137614867+cutepawss@users.noreply.github.com": "cutepawss",
@ -95,20 +97,24 @@ AUTHOR_MAP = {
"i@troy-y.org": "TroyMitchell911",
"mygamez@163.com": "zhongyueming1121",
"hansnow@users.noreply.github.com": "hansnow",
"134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
# contributors (manual mapping from git names)
"ahmedsherif95@gmail.com": "asheriif",
"liujinkun@bytedance.com": "liujinkun2025",
"dmayhem93@gmail.com": "dmahan93",
"fr@tecompanytea.com": "ifrederico",
"cdanis@gmail.com": "cdanis",
"samherring99@gmail.com": "samherring99",
"desaiaum08@gmail.com": "Aum08Desai",
"shannon.sands.1979@gmail.com": "shannonsands",
"shannon@nousresearch.com": "shannonsands",
"abdi.moya@gmail.com": "AxDSan",
"eri@plasticlabs.ai": "Erosika",
"hjcpuro@gmail.com": "hjc-puro",
"xaydinoktay@gmail.com": "aydnOktay",
"abdullahfarukozden@gmail.com": "Farukest",
"lovre.pesut@gmail.com": "rovle",
"xjtumj@gmail.com": "mengjian-github",
"kevinskysunny@gmail.com": "kevinskysunny",
"xiewenxuan462@gmail.com": "yule975",
"yiweimeng.dlut@hotmail.com": "meng93",
@ -308,6 +314,7 @@ AUTHOR_MAP = {
"anthhub@163.com": "anthhub",
"shenuu@gmail.com": "shenuu",
"xiayh17@gmail.com": "xiayh0107",
"zhujianxyz@gmail.com": "opriz",
"asurla@nvidia.com": "anniesurla",
"limkuan24@gmail.com": "WideLee",
"aviralarora002@gmail.com": "AviArora02-commits",
@ -323,6 +330,8 @@ AUTHOR_MAP = {
"aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80",
"zheng.jerilyn@gmail.com": "jerilynzheng",
"asslaenn5@gmail.com": "Aslaaen",
"shalompmc0505@naver.com": "pinion05",
"105142614+VTRiot@users.noreply.github.com": "VTRiot",
}

View file

@ -372,6 +372,37 @@ async function startSocket() {
const app = express();
app.use(express.json());
// Host-header validation — defends against DNS rebinding.
// The bridge binds loopback-only (127.0.0.1) but a victim browser on
// the same machine could be tricked into fetching from an attacker
// hostname that TTL-flips to 127.0.0.1. Reject any request whose Host
// header doesn't resolve to a loopback alias.
// See GHSA-ppp5-vxwm-4cf7.
const _ACCEPTED_HOST_VALUES = new Set([
'localhost',
'127.0.0.1',
'[::1]',
'::1',
]);
app.use((req, res, next) => {
const raw = (req.headers.host || '').trim();
if (!raw) {
return res.status(400).json({ error: 'Missing Host header' });
}
// Strip port suffix: "localhost:3000" → "localhost"
const hostOnly = (raw.includes(':')
? raw.substring(0, raw.lastIndexOf(':'))
: raw
).replace(/^\[|\]$/g, '').toLowerCase();
if (!_ACCEPTED_HOST_VALUES.has(hostOnly)) {
return res.status(400).json({
error: 'Invalid Host header. Bridge accepts loopback hosts only.',
});
}
next();
});
// Poll for new messages (long-poll style)
app.get('/messages', (req, res) => {
const msgs = messageQueue.splice(0, messageQueue.length);

View file

@ -2,7 +2,7 @@
name: maps
description: >
Location intelligence — geocode a place, reverse-geocode coordinates,
find nearby places (44 POI categories), driving/walking/cycling
find nearby places (46 POI categories), driving/walking/cycling
distance + time, turn-by-turn directions, timezone lookup, bounding
box + area for a named place, and POI search within a rectangle.
Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
@ -83,12 +83,13 @@ python3 $MAPS nearby --near "90210" --category pharmacy
python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10
```
44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket,
atm, gas_station, parking, museum, park, school, university, bank, police,
fire_station, library, airport, train_station, bus_stop, church, mosque,
synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office,
convenience_store, bakery, bookshop, laundry, car_wash, car_rental,
bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub.
46 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, guest_house,
camp_site, supermarket, atm, gas_station, parking, museum, park, school,
university, bank, police, fire_station, library, airport, train_station,
bus_stop, church, mosque, synagogue, dentist, doctor, cinema, theatre, gym,
swimming_pool, post_office, convenience_store, bakery, bookshop, laundry,
car_wash, car_rental, bicycle_rental, taxi, veterinary, zoo, playground,
stadium, nightclub.
Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`,
`maps_url` (clickable Google Maps link), `directions_url` (Google Maps

View file

@ -58,7 +58,9 @@ CATEGORY_TAGS = {
"restaurant": ("amenity", "restaurant"),
"cafe": ("amenity", "cafe"),
"bar": ("amenity", "bar"),
"bakery": ("shop", "bakery"),
# bakery is tagged as shop=bakery in the OSM wiki, but some mappers use
# amenity=bakery. Search both so small indie bakeries aren't missed.
"bakery": [("shop", "bakery"), ("amenity", "bakery")],
"convenience_store": ("shop", "convenience"),
# Health
"hospital": ("amenity", "hospital"),
@ -68,6 +70,8 @@ CATEGORY_TAGS = {
"veterinary": ("amenity", "veterinary"),
# Accommodation
"hotel": ("tourism", "hotel"),
"guest_house": ("tourism", "guest_house"),
"camp_site": ("tourism", "camp_site"),
# Shopping & Services
"supermarket": ("shop", "supermarket"),
"bookshop": ("shop", "books"),
@ -120,6 +124,19 @@ RELIGION_FILTER = {
VALID_CATEGORIES = sorted(CATEGORY_TAGS.keys())
def _tags_for(category):
"""Return the CATEGORY_TAGS entry as a list of (key, value) pairs.
Most categories map to a single (tag_key, tag_val) tuple, but some
(e.g. ``bakery``) are tagged under more than one OSM key and are
represented as a list of tuples. Normalise both forms to a list.
"""
entry = CATEGORY_TAGS[category]
if isinstance(entry, list):
return list(entry)
return [entry]
OSRM_PROFILES = {
"driving": "driving",
"walking": "foot",
@ -338,36 +355,63 @@ def geocode_single(query):
# ---------------------------------------------------------------------------
def build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
religion=None):
"""Build an Overpass QL query for nearby POIs around a point."""
religion=None, tag_pairs=None):
"""Build an Overpass QL query for nearby POIs around a point.
If ``tag_pairs`` is provided, the query unions across every
``(key, value)`` pair (used for categories like ``bakery`` that are
tagged under more than one OSM key). Otherwise falls back to the
single ``tag_key``/``tag_val`` pair for back-compat.
"""
pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)]
religion_filter = ""
if religion:
religion_filter = f'["religion"="{religion}"]'
body_lines = []
for k, v in pairs:
body_lines.append(
f' node["{k}"="{v}"]{religion_filter}'
f'(around:{radius},{lat},{lon});'
)
body_lines.append(
f' way["{k}"="{v}"]{religion_filter}'
f'(around:{radius},{lat},{lon});'
)
body = "\n".join(body_lines)
return (
f'[out:json][timeout:25];\n'
f'(\n'
f' node["{tag_key}"="{tag_val}"]{religion_filter}'
f'(around:{radius},{lat},{lon});\n'
f' way["{tag_key}"="{tag_val}"]{religion_filter}'
f'(around:{radius},{lat},{lon});\n'
f'{body}\n'
f');\n'
f'out center {limit};\n'
)
def build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit,
religion=None):
"""Build an Overpass QL query for POIs within a bounding box."""
religion=None, tag_pairs=None):
"""Build an Overpass QL query for POIs within a bounding box.
See ``build_overpass_nearby`` for ``tag_pairs`` semantics.
"""
pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)]
religion_filter = ""
if religion:
religion_filter = f'["religion"="{religion}"]'
body_lines = []
for k, v in pairs:
body_lines.append(
f' node["{k}"="{v}"]{religion_filter}'
f'({south},{west},{north},{east});'
)
body_lines.append(
f' way["{k}"="{v}"]{religion_filter}'
f'({south},{west},{north},{east});'
)
body = "\n".join(body_lines)
return (
f'[out:json][timeout:25];\n'
f'(\n'
f' node["{tag_key}"="{tag_val}"]{religion_filter}'
f'({south},{west},{north},{east});\n'
f' way["{tag_key}"="{tag_val}"]{religion_filter}'
f'({south},{west},{north},{east});\n'
f'{body}\n'
f');\n'
f'out center {limit};\n'
)
@ -605,10 +649,10 @@ def cmd_nearby(args):
# appear twice.
merged = {}
for category in categories:
tag_key, tag_val = CATEGORY_TAGS[category]
tag_pairs = _tags_for(category)
religion = RELIGION_FILTER.get(category)
query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
religion=religion)
query = build_overpass_nearby(None, None, lat, lon, radius, limit,
religion=religion, tag_pairs=tag_pairs)
raw = overpass_query(query)
elements = raw.get("elements", [])
for place in parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon):
@ -945,10 +989,10 @@ def cmd_bbox(args):
if limit <= 0:
error_exit("Limit must be a positive integer.")
tag_key, tag_val = CATEGORY_TAGS[category]
tag_pairs = _tags_for(category)
religion = RELIGION_FILTER.get(category)
query = build_overpass_bbox(tag_key, tag_val, south, west, north, east,
limit, religion=religion)
query = build_overpass_bbox(None, None, south, west, north, east,
limit, religion=religion, tag_pairs=tag_pairs)
raw = overpass_query(query)

View file

@ -1,7 +1,7 @@
---
name: xurl
description: Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access.
version: 1.1.0
version: 1.1.1
author: xdevplatform + openclaw + Hermes Agent
license: MIT
platforms: [linux, macos]
@ -95,6 +95,12 @@ These steps must be performed by the user directly, NOT by the agent, because th
xurl auth oauth2 --app my-app
```
(This opens a browser for the OAuth 2.0 PKCE flow.)
If X returns a `UsernameNotFound` error or 403 on the post-OAuth `/2/users/me` lookup, pass your handle explicitly (xurl v1.1.0+):
```bash
xurl auth oauth2 --app my-app YOUR_USERNAME
```
This binds the token to your handle and skips the broken `/2/users/me` call.
6. Set the app as default so all commands use it:
```bash
xurl auth default my-app
@ -380,6 +386,7 @@ xurl --app staging /2/users/me # one-off against staging
| --- | --- | --- |
| Auth errors after successful OAuth flow | Token saved to `default` app (no client-id/secret) instead of your named app | `xurl auth oauth2 --app my-app` then `xurl auth default my-app` |
| `unauthorized_client` during OAuth | App type set to "Native App" in X dashboard | Change to "Web app, automated app or bot" in User Authentication Settings |
| `UsernameNotFound` or 403 on `/2/users/me` right after OAuth | X not returning username reliably from `/2/users/me` | Re-run `xurl auth oauth2 --app my-app YOUR_USERNAME` (xurl v1.1.0+) to pass the handle explicitly |
| 401 on every request | Token expired or wrong default app | Check `xurl auth status` — verify `▸` points to an app with oauth2 tokens |
| `client-forbidden` / `client-not-enrolled` | X platform enrollment issue | Dashboard → Apps → Manage → Move to "Pay-per-use" package → Production environment |
| `CreditsDepleted` | $0 balance on X API | Buy credits (min $5) in Developer Console → Billing |

View file

@ -0,0 +1,170 @@
"""Tests for GHSA-96vc-wcxf-jjff and GHSA-qg5c-hvr5-hjgr.
Two related ACP approval-flow issues:
- 96vc: ACP didn't set HERMES_EXEC_ASK, so `check_all_command_guards`
took the non-interactive auto-approve path and never consulted the
ACP-supplied callback.
- qg5c: `_approval_callback` was a module-global in terminal_tool;
overlapping ACP sessions overwrote each other's callback slot.
Both fixed together by:
1. Setting HERMES_EXEC_ASK inside _run_agent (wraps the agent call).
2. Storing the callback in thread-local state so concurrent executor
threads don't collide.
"""
import os
import threading
from unittest.mock import MagicMock
import pytest
class TestThreadLocalApprovalCallback:
"""GHSA-qg5c-hvr5-hjgr: set_approval_callback must be per-thread so
concurrent ACP sessions don't stomp on each other's handlers."""
def test_set_and_get_in_same_thread(self):
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
cb1 = lambda cmd, desc: "once" # noqa: E731
set_approval_callback(cb1)
assert _get_approval_callback() is cb1
def test_callback_not_visible_in_different_thread(self):
"""Thread A's callback is NOT visible to Thread B."""
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
cb_a = lambda cmd, desc: "thread_a" # noqa: E731
cb_b = lambda cmd, desc: "thread_b" # noqa: E731
seen_in_a = []
seen_in_b = []
def thread_a():
set_approval_callback(cb_a)
# Pause so thread B has time to set its own callback
import time
time.sleep(0.05)
seen_in_a.append(_get_approval_callback())
def thread_b():
set_approval_callback(cb_b)
import time
time.sleep(0.05)
seen_in_b.append(_get_approval_callback())
ta = threading.Thread(target=thread_a)
tb = threading.Thread(target=thread_b)
ta.start()
tb.start()
ta.join()
tb.join()
# Each thread must see ONLY its own callback — not the other's
assert seen_in_a == [cb_a]
assert seen_in_b == [cb_b]
def test_main_thread_callback_not_leaked_to_worker(self):
"""A callback set in the main thread does NOT leak into a
freshly-spawned worker thread."""
from tools.terminal_tool import (
set_approval_callback,
_get_approval_callback,
)
cb_main = lambda cmd, desc: "main" # noqa: E731
set_approval_callback(cb_main)
worker_saw = []
def worker():
worker_saw.append(_get_approval_callback())
t = threading.Thread(target=worker)
t.start()
t.join()
# Worker thread has no callback set — TLS is empty for it
assert worker_saw == [None]
# Main thread still has its callback
assert _get_approval_callback() is cb_main
def test_sudo_password_callback_also_thread_local(self):
"""Same protection applies to the sudo password callback."""
from tools.terminal_tool import (
set_sudo_password_callback,
_get_sudo_password_callback,
)
cb_main = lambda: "main-password" # noqa: E731
set_sudo_password_callback(cb_main)
worker_saw = []
def worker():
worker_saw.append(_get_sudo_password_callback())
t = threading.Thread(target=worker)
t.start()
t.join()
assert worker_saw == [None]
assert _get_sudo_password_callback() is cb_main
class TestAcpExecAskGate:
"""GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so
that tools.approval.check_all_command_guards takes the CLI-interactive
path (consults the registered callback via prompt_dangerous_approval)
instead of the non-interactive auto-approve shortcut.
(HERMES_EXEC_ASK takes the gateway-queue path which requires a
notify_cb registered in _gateway_notify_cbs not applicable to ACP,
which uses a direct callback shape.)"""
def test_interactive_env_var_routes_to_callback(self, monkeypatch):
"""When HERMES_INTERACTIVE is set and an approval callback is
registered, a dangerous command must route through the callback."""
# Clean env
monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
from tools.approval import check_all_command_guards
called_with = []
def fake_cb(command, description, *, allow_permanent=True):
called_with.append((command, description))
return "once"
# Without HERMES_INTERACTIVE: takes auto-approve path, callback NOT called
result = check_all_command_guards(
"rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
)
assert result["approved"] is True
assert called_with == [], (
"without HERMES_INTERACTIVE the non-interactive auto-approve "
"path should fire without consulting the callback"
)
# With HERMES_INTERACTIVE: callback IS called, approval flows through it
monkeypatch.setenv("HERMES_INTERACTIVE", "1")
called_with.clear()
result = check_all_command_guards(
"rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
)
assert called_with, (
"with HERMES_INTERACTIVE the approval path should consult the "
"registered callback — this was the ACP bypass in "
"GHSA-96vc-wcxf-jjff"
)
assert result["approved"] is True

View file

@ -73,3 +73,17 @@ class TestApprovalMapping:
result = cb("rm -rf /", "dangerous")
assert result == "deny"
def test_approval_none_response_returns_deny(self):
"""When request_permission resolves to None, the callback should return 'deny'."""
loop = MagicMock(spec=asyncio.AbstractEventLoop)
mock_rp = MagicMock(name="request_permission")
future = MagicMock(spec=Future)
future.result.return_value = None
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0)
result = cb("echo hi", "demo")
assert result == "deny"

View file

@ -95,19 +95,37 @@ class TestInitialize:
class TestAuthenticate:
@pytest.mark.asyncio
async def test_authenticate_with_provider_configured(self, agent, monkeypatch):
async def test_authenticate_with_matching_method_id(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.has_provider",
lambda: True,
"acp_adapter.server.detect_provider",
lambda: "openrouter",
)
resp = await agent.authenticate(method_id="openrouter")
assert isinstance(resp, AuthenticateResponse)
@pytest.mark.asyncio
async def test_authenticate_is_case_insensitive(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.detect_provider",
lambda: "openrouter",
)
resp = await agent.authenticate(method_id="OpenRouter")
assert isinstance(resp, AuthenticateResponse)
@pytest.mark.asyncio
async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.detect_provider",
lambda: "openrouter",
)
resp = await agent.authenticate(method_id="totally-invalid-method")
assert resp is None
@pytest.mark.asyncio
async def test_authenticate_without_provider(self, agent, monkeypatch):
monkeypatch.setattr(
"acp_adapter.server.has_provider",
lambda: False,
"acp_adapter.server.detect_provider",
lambda: None,
)
resp = await agent.authenticate(method_id="openrouter")
assert resp is None
@ -252,6 +270,57 @@ class TestListAndFork:
mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3")
@pytest.mark.asyncio
async def test_list_sessions_pagination_first_page(self, agent):
from acp_adapter import server as acp_server
infos = [
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
for i in range(acp_server._LIST_SESSIONS_PAGE_SIZE + 5)
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions()
assert len(resp.sessions) == acp_server._LIST_SESSIONS_PAGE_SIZE
assert resp.next_cursor == resp.sessions[-1].session_id
@pytest.mark.asyncio
async def test_list_sessions_pagination_no_more(self, agent):
infos = [
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
for i in range(3)
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions()
assert len(resp.sessions) == 3
assert resp.next_cursor is None
@pytest.mark.asyncio
async def test_list_sessions_cursor_resumes_after_match(self, agent):
infos = [
{"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
{"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
{"session_id": "s3", "cwd": "/tmp", "title": None, "updated_at": 0.0},
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions(cursor="s1")
assert [s.session_id for s in resp.sessions] == ["s2", "s3"]
assert resp.next_cursor is None
@pytest.mark.asyncio
async def test_list_sessions_unknown_cursor_returns_empty(self, agent):
infos = [
{"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
{"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
]
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
resp = await agent.list_sessions(cursor="does-not-exist")
assert resp.sessions == []
assert resp.next_cursor is None
# ---------------------------------------------------------------------------
# session configuration / model routing
# ---------------------------------------------------------------------------

View file

@ -414,7 +414,11 @@ class TestRunOauthSetupToken:
token = run_oauth_setup_token()
assert token == "from-cred-file"
mock_run.assert_called_once()
# Don't assert exact call count — the contract is "credentials flow
# through", not "exactly one subprocess call". xdist cross-test
# pollution (other tests shimming subprocess via plugins) has flaked
# assert_called_once() in CI.
assert mock_run.called
def test_returns_token_from_env_var(self, monkeypatch, tmp_path):
"""Falls back to CLAUDE_CODE_OAUTH_TOKEN env var when no cred files."""

View file

@ -0,0 +1,238 @@
"""Regression tests: normalize_anthropic_response_v2 vs v1.
Constructs mock Anthropic responses and asserts that the v2 function
(returning NormalizedResponse) produces identical field values to the
original v1 function (returning SimpleNamespace + finish_reason).
"""
import json
import pytest
from types import SimpleNamespace
from agent.anthropic_adapter import (
normalize_anthropic_response,
normalize_anthropic_response_v2,
)
from agent.transports.types import NormalizedResponse, ToolCall
# ---------------------------------------------------------------------------
# Helpers to build mock Anthropic SDK responses
# ---------------------------------------------------------------------------
def _text_block(text: str):
return SimpleNamespace(type="text", text=text)
def _thinking_block(thinking: str, signature: str = "sig_abc"):
return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
def _tool_use_block(id: str, name: str, input: dict):
return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
def _response(content_blocks, stop_reason="end_turn"):
return SimpleNamespace(
content=content_blocks,
stop_reason=stop_reason,
usage=SimpleNamespace(
input_tokens=10,
output_tokens=5,
),
)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestTextOnly:
"""Text-only response — no tools, no thinking."""
def setup_method(self):
self.resp = _response([_text_block("Hello world")])
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_type(self):
assert isinstance(self.v2, NormalizedResponse)
def test_content_matches(self):
assert self.v2.content == self.v1_msg.content
def test_finish_reason_matches(self):
assert self.v2.finish_reason == self.v1_finish
def test_no_tool_calls(self):
assert self.v2.tool_calls is None
assert self.v1_msg.tool_calls is None
def test_no_reasoning(self):
assert self.v2.reasoning is None
assert self.v1_msg.reasoning is None
class TestWithToolCalls:
"""Response with tool calls."""
def setup_method(self):
self.resp = _response(
[
_text_block("I'll check that"),
_tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
_tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
],
stop_reason="tool_use",
)
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_finish_reason(self):
assert self.v2.finish_reason == "tool_calls"
assert self.v1_finish == "tool_calls"
def test_tool_call_count(self):
assert len(self.v2.tool_calls) == 2
assert len(self.v1_msg.tool_calls) == 2
def test_tool_call_ids_match(self):
for i in range(2):
assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
def test_tool_call_names_match(self):
assert self.v2.tool_calls[0].name == "terminal"
assert self.v2.tool_calls[1].name == "read_file"
for i in range(2):
assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
def test_tool_call_arguments_match(self):
for i in range(2):
assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
def test_content_preserved(self):
assert self.v2.content == self.v1_msg.content
assert "check that" in self.v2.content
class TestWithThinking:
"""Response with thinking blocks (Claude 3.5+ extended thinking)."""
def setup_method(self):
self.resp = _response([
_thinking_block("Let me think about this carefully..."),
_text_block("The answer is 42."),
])
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_reasoning_matches(self):
assert self.v2.reasoning == self.v1_msg.reasoning
assert "think about this" in self.v2.reasoning
def test_reasoning_details_in_provider_data(self):
v1_details = self.v1_msg.reasoning_details
v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
assert v1_details is not None
assert v2_details is not None
assert len(v2_details) == len(v1_details)
def test_content_excludes_thinking(self):
assert self.v2.content == "The answer is 42."
class TestMixed:
"""Response with thinking + text + tool calls."""
def setup_method(self):
self.resp = _response(
[
_thinking_block("Planning my approach..."),
_text_block("I'll run the command"),
_tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
],
stop_reason="tool_use",
)
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
self.v2 = normalize_anthropic_response_v2(self.resp)
def test_all_fields_present(self):
assert self.v2.content is not None
assert self.v2.tool_calls is not None
assert self.v2.reasoning is not None
assert self.v2.finish_reason == "tool_calls"
def test_content_matches(self):
assert self.v2.content == self.v1_msg.content
def test_reasoning_matches(self):
assert self.v2.reasoning == self.v1_msg.reasoning
def test_tool_call_matches(self):
assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
class TestStopReasons:
"""Verify finish_reason mapping matches between v1 and v2."""
@pytest.mark.parametrize("stop_reason,expected", [
("end_turn", "stop"),
("tool_use", "tool_calls"),
("max_tokens", "length"),
("stop_sequence", "stop"),
("refusal", "content_filter"),
("model_context_window_exceeded", "length"),
("unknown_future_reason", "stop"),
])
def test_stop_reason_mapping(self, stop_reason, expected):
resp = _response([_text_block("x")], stop_reason=stop_reason)
v1_msg, v1_finish = normalize_anthropic_response(resp)
v2 = normalize_anthropic_response_v2(resp)
assert v2.finish_reason == v1_finish == expected
class TestStripToolPrefix:
"""Verify mcp_ prefix stripping works identically."""
def test_prefix_stripped(self):
resp = _response(
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
stop_reason="tool_use",
)
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
assert v1_msg.tool_calls[0].function.name == "terminal"
assert v2.tool_calls[0].name == "terminal"
def test_prefix_kept(self):
resp = _response(
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
stop_reason="tool_use",
)
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
assert v2.tool_calls[0].name == "mcp_terminal"
class TestEdgeCases:
"""Edge cases: empty content, no blocks, etc."""
def test_empty_content_blocks(self):
resp = _response([])
v1_msg, v1_finish = normalize_anthropic_response(resp)
v2 = normalize_anthropic_response_v2(resp)
assert v2.content == v1_msg.content
assert v2.content is None
def test_no_reasoning_details_means_none_provider_data(self):
resp = _response([_text_block("hi")])
v2 = normalize_anthropic_response_v2(resp)
assert v2.provider_data is None
def test_v2_returns_dataclass_not_namespace(self):
resp = _response([_text_block("hi")])
v2 = normalize_anthropic_response_v2(resp)
assert isinstance(v2, NormalizedResponse)
assert not isinstance(v2, SimpleNamespace)

View file

@ -0,0 +1,146 @@
"""Focused regressions for the Copilot ACP shim safety layer."""
from __future__ import annotations
import io
import json
import os
import tempfile
import unittest
from pathlib import Path
from unittest.mock import patch
from agent.copilot_acp_client import CopilotACPClient
class _FakeProcess:
def __init__(self) -> None:
self.stdin = io.StringIO()
class CopilotACPClientSafetyTests(unittest.TestCase):
def setUp(self) -> None:
self.client = CopilotACPClient(acp_cwd="/tmp")
def _dispatch(self, message: dict, *, cwd: str) -> dict:
process = _FakeProcess()
handled = self.client._handle_server_message(
message,
process=process,
cwd=cwd,
text_parts=[],
reasoning_parts=[],
)
self.assertTrue(handled)
payload = process.stdin.getvalue().strip()
self.assertTrue(payload)
return json.loads(payload)
def test_request_permission_is_not_auto_allowed(self) -> None:
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 1,
"method": "session/request_permission",
"params": {},
},
cwd="/tmp",
)
outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome"))
self.assertEqual(outcome, "cancelled")
def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
home = Path(tmpdir) / "home"
blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json"
blocked.parent.mkdir(parents=True, exist_ok=True)
blocked.write_text('{"token":"sk-test-secret-1234567890"}')
with patch.dict(
os.environ,
{"HOME": str(home), "HERMES_HOME": str(home / ".hermes")},
clear=False,
):
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 2,
"method": "fs/read_text_file",
"params": {"path": str(blocked)},
},
cwd=str(home),
)
self.assertIn("error", response)
def test_read_text_file_redacts_sensitive_content(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
secret_file = root / "config.env"
secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012")
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 3,
"method": "fs/read_text_file",
"params": {"path": str(secret_file)},
},
cwd=str(root),
)
content = ((response.get("result") or {}).get("content") or "")
self.assertNotIn("abc123def456", content)
self.assertIn("OPENAI_API_KEY=", content)
def test_write_text_file_reuses_write_denylist(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
home = Path(tmpdir) / "home"
target = home / ".ssh" / "id_rsa"
target.parent.mkdir(parents=True, exist_ok=True)
with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 4,
"method": "fs/write_text_file",
"params": {
"path": str(target),
"content": "fake-private-key",
},
},
cwd=str(home),
)
self.assertIn("error", response)
self.assertFalse(target.exists())
def test_write_text_file_respects_safe_root(self) -> None:
with tempfile.TemporaryDirectory() as tmpdir:
root = Path(tmpdir)
safe_root = root / "workspace"
safe_root.mkdir()
outside = root / "outside.txt"
with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False):
response = self._dispatch(
{
"jsonrpc": "2.0",
"id": 5,
"method": "fs/write_text_file",
"params": {
"path": str(outside),
"content": "should-not-write",
},
},
cwd=str(root),
)
self.assertIn("error", response)
self.assertFalse(outside.exists())
if __name__ == "__main__":
unittest.main()

View file

@ -516,13 +516,12 @@ class TestGatewayFormatting:
assert "**" in text # Markdown bold
def test_gateway_format_hides_cost(self, populated_db):
"""Gateway format omits dollar figures and internal cache details."""
engine = InsightsEngine(populated_db)
report = engine.generate(days=30)
text = engine.format_gateway(report)
assert "$" in text
assert "Top Skills" in text
assert "Est. cost" in text
assert "$" not in text
assert "cache" not in text.lower()
def test_gateway_format_shows_models(self, populated_db):

View file

@ -84,38 +84,6 @@ class TestMinimaxAuxModel:
assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
class TestMinimaxModelCatalog:
"""Verify the model catalog matches official Anthropic-compat endpoint models.
Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
"""
def test_catalog_includes_current_models(self):
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M2.7" in models
assert "MiniMax-M2.5" in models
assert "MiniMax-M2.1" in models
assert "MiniMax-M2" in models
def test_catalog_excludes_m1_family(self):
"""M1 models are not available on the /anthropic endpoint."""
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M1" not in models
def test_catalog_excludes_highspeed(self):
"""Highspeed variants are available but not shown in default catalog
(users can still specify them manually)."""
from hermes_cli.models import _PROVIDER_MODELS
for provider in ("minimax", "minimax-cn"):
models = _PROVIDER_MODELS[provider]
assert "MiniMax-M2.7-highspeed" not in models
assert "MiniMax-M2.5-highspeed" not in models
class TestMinimaxBetaHeaders:
"""MiniMax Anthropic-compat endpoints reject fine-grained-tool-streaming beta.

View file

@ -6,6 +6,8 @@ when proxy env vars or custom endpoint URLs are malformed.
"""
from __future__ import annotations
import os
import pytest
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
@ -31,6 +33,12 @@ def test_proxy_env_accepts_empty(monkeypatch):
_validate_proxy_env_urls() # should not raise
def test_proxy_env_normalizes_socks_alias(monkeypatch):
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
_validate_proxy_env_urls()
assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/"
@pytest.mark.parametrize("key", [
"HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
"http_proxy", "https_proxy", "all_proxy",

View file

@ -405,3 +405,191 @@ class TestPlanSkillHelpers:
assert "Add a /plan command" in msg
assert ".hermes/plans/plan.md" in msg
assert "Runtime note:" in msg
class TestSkillDirectoryHeader:
"""The activation message must expose the absolute skill directory and
explain how to resolve relative paths, so skills with bundled scripts
don't force the agent into a second ``skill_view()`` round-trip."""
def test_header_contains_absolute_skill_dir(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
skill_dir = _make_skill(tmp_path, "abs-dir-skill")
scan_skill_commands()
msg = build_skill_invocation_message("/abs-dir-skill", "go")
assert msg is not None
assert f"[Skill directory: {skill_dir}]" in msg
assert "Resolve any relative paths" in msg
def test_supporting_files_shown_with_absolute_paths(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
skill_dir = _make_skill(tmp_path, "scripted-skill")
(skill_dir / "scripts").mkdir()
(skill_dir / "scripts" / "run.js").write_text("console.log('hi')")
scan_skill_commands()
msg = build_skill_invocation_message("/scripted-skill")
assert msg is not None
# The supporting-files block must emit both the relative form (so the
# agent can call skill_view on it) and the absolute form (so it can
# run the script directly via terminal).
assert "scripts/run.js" in msg
assert str(skill_dir / "scripts" / "run.js") in msg
assert f"node {skill_dir}/scripts/foo.js" in msg
class TestTemplateVarSubstitution:
"""``${HERMES_SKILL_DIR}`` and ``${HERMES_SESSION_ID}`` in SKILL.md body
are replaced before the agent sees the content."""
def test_substitutes_skill_dir(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
skill_dir = _make_skill(
tmp_path,
"templated",
body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
)
scan_skill_commands()
msg = build_skill_invocation_message("/templated")
assert msg is not None
assert f"node {skill_dir}/scripts/foo.js" in msg
# The literal template token must not leak through.
assert "${HERMES_SKILL_DIR}" not in msg.split("[Skill directory:")[0]
def test_substitutes_session_id_when_available(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_skill(
tmp_path,
"sess-templated",
body="Session: ${HERMES_SESSION_ID}",
)
scan_skill_commands()
msg = build_skill_invocation_message(
"/sess-templated", task_id="abc-123"
)
assert msg is not None
assert "Session: abc-123" in msg
def test_leaves_session_id_token_when_missing(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_skill(
tmp_path,
"sess-missing",
body="Session: ${HERMES_SESSION_ID}",
)
scan_skill_commands()
msg = build_skill_invocation_message("/sess-missing", task_id=None)
assert msg is not None
# No session — token left intact so the author can spot it.
assert "Session: ${HERMES_SESSION_ID}" in msg
def test_disable_template_vars_via_config(self, tmp_path):
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"agent.skill_commands._load_skills_config",
return_value={"template_vars": False},
),
):
_make_skill(
tmp_path,
"no-sub",
body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
)
scan_skill_commands()
msg = build_skill_invocation_message("/no-sub")
assert msg is not None
# Template token must survive when substitution is disabled.
assert "${HERMES_SKILL_DIR}/scripts/foo.js" in msg
class TestInlineShellExpansion:
"""Inline ``!`cmd`` snippets in SKILL.md run before the agent sees the
content but only when the user has opted in via config."""
def test_inline_shell_is_off_by_default(self, tmp_path):
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
_make_skill(
tmp_path,
"dyn-default-off",
body="Today is !`echo INLINE_RAN`.",
)
scan_skill_commands()
msg = build_skill_invocation_message("/dyn-default-off")
assert msg is not None
# Default config has inline_shell=False — snippet must stay literal.
assert "!`echo INLINE_RAN`" in msg
assert "Today is INLINE_RAN." not in msg
def test_inline_shell_runs_when_enabled(self, tmp_path):
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"agent.skill_commands._load_skills_config",
return_value={"template_vars": True, "inline_shell": True,
"inline_shell_timeout": 5},
),
):
_make_skill(
tmp_path,
"dyn-on",
body="Marker: !`echo INLINE_RAN`.",
)
scan_skill_commands()
msg = build_skill_invocation_message("/dyn-on")
assert msg is not None
assert "Marker: INLINE_RAN." in msg
assert "!`echo INLINE_RAN`" not in msg
def test_inline_shell_runs_in_skill_directory(self, tmp_path):
"""Inline snippets get the skill dir as CWD so relative paths work."""
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"agent.skill_commands._load_skills_config",
return_value={"template_vars": True, "inline_shell": True,
"inline_shell_timeout": 5},
),
):
skill_dir = _make_skill(
tmp_path,
"dyn-cwd",
body="Here: !`pwd`",
)
scan_skill_commands()
msg = build_skill_invocation_message("/dyn-cwd")
assert msg is not None
assert f"Here: {skill_dir}" in msg
def test_inline_shell_timeout_does_not_break_message(self, tmp_path):
with (
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
patch(
"agent.skill_commands._load_skills_config",
return_value={"template_vars": True, "inline_shell": True,
"inline_shell_timeout": 1},
),
):
_make_skill(
tmp_path,
"dyn-slow",
body="Slow: !`sleep 5 && printf DYN_MARKER`",
)
scan_skill_commands()
msg = build_skill_invocation_message("/dyn-slow")
assert msg is not None
# Timeout is surfaced as a marker instead of propagating as an error,
# and the rest of the skill message still renders.
assert "inline-shell timeout" in msg
# The command's intended stdout never made it through — only the
# timeout marker (which echoes the command text) survives.
assert "DYN_MARKER" not in msg.replace("sleep 5 && printf DYN_MARKER", "")

View file

View file

@ -0,0 +1,220 @@
"""Tests for the transport ABC, registry, and AnthropicTransport."""
import pytest
from types import SimpleNamespace
from unittest.mock import MagicMock
from agent.transports.base import ProviderTransport
from agent.transports.types import NormalizedResponse, ToolCall, Usage
from agent.transports import get_transport, register_transport, _REGISTRY
# ── ABC contract tests ──────────────────────────────────────────────────
class TestProviderTransportABC:
"""Verify the ABC contract is enforceable."""
def test_cannot_instantiate_abc(self):
with pytest.raises(TypeError):
ProviderTransport()
def test_concrete_must_implement_all_abstract(self):
class Incomplete(ProviderTransport):
@property
def api_mode(self):
return "test"
with pytest.raises(TypeError):
Incomplete()
def test_minimal_concrete(self):
class Minimal(ProviderTransport):
@property
def api_mode(self):
return "test_minimal"
def convert_messages(self, messages, **kw):
return messages
def convert_tools(self, tools):
return tools
def build_kwargs(self, model, messages, tools=None, **params):
return {"model": model, "messages": messages}
def normalize_response(self, response, **kw):
return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop")
t = Minimal()
assert t.api_mode == "test_minimal"
assert t.validate_response(None) is True # default
assert t.extract_cache_stats(None) is None # default
assert t.map_finish_reason("end_turn") == "end_turn" # default passthrough
# ── Registry tests ───────────────────────────────────────────────────────
class TestTransportRegistry:
def test_get_unregistered_returns_none(self):
assert get_transport("nonexistent_mode") is None
def test_anthropic_registered_on_import(self):
import agent.transports.anthropic # noqa: F401
t = get_transport("anthropic_messages")
assert t is not None
assert t.api_mode == "anthropic_messages"
def test_register_and_get(self):
class DummyTransport(ProviderTransport):
@property
def api_mode(self):
return "dummy_test"
def convert_messages(self, messages, **kw):
return messages
def convert_tools(self, tools):
return tools
def build_kwargs(self, model, messages, tools=None, **params):
return {}
def normalize_response(self, response, **kw):
return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop")
register_transport("dummy_test", DummyTransport)
t = get_transport("dummy_test")
assert t.api_mode == "dummy_test"
# Cleanup
_REGISTRY.pop("dummy_test", None)
# ── AnthropicTransport tests ────────────────────────────────────────────
class TestAnthropicTransport:
@pytest.fixture
def transport(self):
import agent.transports.anthropic # noqa: F401
return get_transport("anthropic_messages")
def test_api_mode(self, transport):
assert transport.api_mode == "anthropic_messages"
def test_convert_tools_simple(self, transport):
tools = [{
"type": "function",
"function": {
"name": "test_tool",
"description": "A test",
"parameters": {"type": "object", "properties": {}},
}
}]
result = transport.convert_tools(tools)
assert len(result) == 1
assert result[0]["name"] == "test_tool"
assert "input_schema" in result[0]
def test_validate_response_none(self, transport):
assert transport.validate_response(None) is False
def test_validate_response_empty_content(self, transport):
r = SimpleNamespace(content=[])
assert transport.validate_response(r) is False
def test_validate_response_valid(self, transport):
r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
assert transport.validate_response(r) is True
def test_map_finish_reason(self, transport):
assert transport.map_finish_reason("end_turn") == "stop"
assert transport.map_finish_reason("tool_use") == "tool_calls"
assert transport.map_finish_reason("max_tokens") == "length"
assert transport.map_finish_reason("stop_sequence") == "stop"
assert transport.map_finish_reason("refusal") == "content_filter"
assert transport.map_finish_reason("model_context_window_exceeded") == "length"
assert transport.map_finish_reason("unknown") == "stop"
def test_extract_cache_stats_none_usage(self, transport):
r = SimpleNamespace(usage=None)
assert transport.extract_cache_stats(r) is None
def test_extract_cache_stats_with_cache(self, transport):
usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50)
r = SimpleNamespace(usage=usage)
result = transport.extract_cache_stats(r)
assert result == {"cached_tokens": 100, "creation_tokens": 50}
def test_extract_cache_stats_zero(self, transport):
usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0)
r = SimpleNamespace(usage=usage)
assert transport.extract_cache_stats(r) is None
def test_normalize_response_text(self, transport):
"""Test normalization of a simple text response."""
r = SimpleNamespace(
content=[SimpleNamespace(type="text", text="Hello world")],
stop_reason="end_turn",
usage=SimpleNamespace(input_tokens=10, output_tokens=5),
model="claude-sonnet-4-6",
)
nr = transport.normalize_response(r)
assert isinstance(nr, NormalizedResponse)
assert nr.content == "Hello world"
assert nr.tool_calls is None or nr.tool_calls == []
assert nr.finish_reason == "stop"
def test_normalize_response_tool_calls(self, transport):
"""Test normalization of a tool-use response."""
r = SimpleNamespace(
content=[
SimpleNamespace(
type="tool_use",
id="toolu_123",
name="terminal",
input={"command": "ls"},
),
],
stop_reason="tool_use",
usage=SimpleNamespace(input_tokens=10, output_tokens=20),
model="claude-sonnet-4-6",
)
nr = transport.normalize_response(r)
assert nr.finish_reason == "tool_calls"
assert len(nr.tool_calls) == 1
tc = nr.tool_calls[0]
assert tc.name == "terminal"
assert tc.id == "toolu_123"
assert '"command"' in tc.arguments
def test_normalize_response_thinking(self, transport):
"""Test normalization preserves thinking content."""
r = SimpleNamespace(
content=[
SimpleNamespace(type="thinking", thinking="Let me think..."),
SimpleNamespace(type="text", text="The answer is 42"),
],
stop_reason="end_turn",
usage=SimpleNamespace(input_tokens=10, output_tokens=15),
model="claude-sonnet-4-6",
)
nr = transport.normalize_response(r)
assert nr.content == "The answer is 42"
assert nr.reasoning == "Let me think..."
def test_build_kwargs_returns_dict(self, transport):
"""Test build_kwargs produces a usable kwargs dict."""
messages = [{"role": "user", "content": "Hello"}]
kw = transport.build_kwargs(
model="claude-sonnet-4-6",
messages=messages,
max_tokens=1024,
)
assert isinstance(kw, dict)
assert "model" in kw
assert "max_tokens" in kw
assert "messages" in kw
def test_convert_messages_extracts_system(self, transport):
"""Test convert_messages separates system from messages."""
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hi"},
]
system, msgs = transport.convert_messages(messages)
# System should be extracted
assert system is not None
# Messages should only have user
assert len(msgs) >= 1

View file

@ -0,0 +1,151 @@
"""Tests for agent/transports/types.py — dataclass construction + helpers."""
import json
import pytest
from agent.transports.types import (
NormalizedResponse,
ToolCall,
Usage,
build_tool_call,
map_finish_reason,
)
# ---------------------------------------------------------------------------
# ToolCall
# ---------------------------------------------------------------------------
class TestToolCall:
def test_basic_construction(self):
tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}')
assert tc.id == "call_abc"
assert tc.name == "terminal"
assert tc.arguments == '{"cmd": "ls"}'
assert tc.provider_data is None
def test_none_id(self):
tc = ToolCall(id=None, name="read_file", arguments="{}")
assert tc.id is None
def test_provider_data(self):
tc = ToolCall(
id="call_x",
name="t",
arguments="{}",
provider_data={"call_id": "call_x", "response_item_id": "fc_x"},
)
assert tc.provider_data["call_id"] == "call_x"
assert tc.provider_data["response_item_id"] == "fc_x"
# ---------------------------------------------------------------------------
# Usage
# ---------------------------------------------------------------------------
class TestUsage:
def test_defaults(self):
u = Usage()
assert u.prompt_tokens == 0
assert u.completion_tokens == 0
assert u.total_tokens == 0
assert u.cached_tokens == 0
def test_explicit(self):
u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80)
assert u.total_tokens == 150
# ---------------------------------------------------------------------------
# NormalizedResponse
# ---------------------------------------------------------------------------
class TestNormalizedResponse:
def test_text_only(self):
r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop")
assert r.content == "hello"
assert r.tool_calls is None
assert r.finish_reason == "stop"
assert r.reasoning is None
assert r.usage is None
assert r.provider_data is None
def test_with_tool_calls(self):
tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')]
r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls")
assert r.finish_reason == "tool_calls"
assert len(r.tool_calls) == 1
assert r.tool_calls[0].name == "terminal"
def test_with_reasoning(self):
r = NormalizedResponse(
content="answer",
tool_calls=None,
finish_reason="stop",
reasoning="I thought about it",
)
assert r.reasoning == "I thought about it"
def test_with_provider_data(self):
r = NormalizedResponse(
content=None,
tool_calls=None,
finish_reason="stop",
provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]},
)
assert r.provider_data["reasoning_details"][0]["type"] == "thinking"
# ---------------------------------------------------------------------------
# build_tool_call
# ---------------------------------------------------------------------------
class TestBuildToolCall:
def test_dict_arguments_serialized(self):
tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"})
assert tc.arguments == json.dumps({"cmd": "ls"})
assert tc.provider_data is None
def test_string_arguments_passthrough(self):
tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}')
assert tc.arguments == '{"path": "/tmp"}'
def test_provider_fields(self):
tc = build_tool_call(
id="call_3",
name="terminal",
arguments="{}",
call_id="call_3",
response_item_id="fc_3",
)
assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"}
def test_none_id(self):
tc = build_tool_call(id=None, name="t", arguments="{}")
assert tc.id is None
# ---------------------------------------------------------------------------
# map_finish_reason
# ---------------------------------------------------------------------------
class TestMapFinishReason:
ANTHROPIC_MAP = {
"end_turn": "stop",
"tool_use": "tool_calls",
"max_tokens": "length",
"stop_sequence": "stop",
"refusal": "content_filter",
}
def test_known_reason(self):
assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop"
assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls"
assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length"
assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter"
def test_unknown_reason_defaults_to_stop(self):
assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop"
def test_none_reason(self):
assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"

View file

@ -0,0 +1,146 @@
"""Regression tests for classic-CLI mid-run /steer dispatch.
Background
----------
/steer sent while the agent is running used to be queued through
``self._pending_input`` alongside ordinary user input. ``process_loop``
pulls from that queue and calls ``process_command()`` but while the
agent is running, ``process_loop`` is blocked inside ``self.chat()``.
By the time the queued /steer was pulled, ``_agent_running`` had
already flipped back to False, so ``process_command()`` took the idle
fallback (``"No agent running; queued as next turn"``) and delivered
the steer as an ordinary next-turn message.
The fix dispatches /steer inline on the UI thread when the agent is
running matching the existing pattern for /model so the steer
reaches ``agent.steer()`` (thread-safe) without touching the queue.
These tests exercise the detector + inline dispatch without starting a
prompt_toolkit app.
"""
from __future__ import annotations
import importlib
import sys
from unittest.mock import MagicMock, patch
def _make_cli():
"""Create a HermesCLI instance with prompt_toolkit stubbed out."""
_clean_config = {
"model": {
"default": "anthropic/claude-opus-4.6",
"base_url": "https://openrouter.ai/api/v1",
"provider": "auto",
},
"display": {"compact": False, "tool_progress": "all"},
"agent": {},
"terminal": {"env_type": "local"},
}
clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
prompt_toolkit_stubs = {
"prompt_toolkit": MagicMock(),
"prompt_toolkit.history": MagicMock(),
"prompt_toolkit.styles": MagicMock(),
"prompt_toolkit.patch_stdout": MagicMock(),
"prompt_toolkit.application": MagicMock(),
"prompt_toolkit.layout": MagicMock(),
"prompt_toolkit.layout.processors": MagicMock(),
"prompt_toolkit.filters": MagicMock(),
"prompt_toolkit.layout.dimension": MagicMock(),
"prompt_toolkit.layout.menus": MagicMock(),
"prompt_toolkit.widgets": MagicMock(),
"prompt_toolkit.key_binding": MagicMock(),
"prompt_toolkit.completion": MagicMock(),
"prompt_toolkit.formatted_text": MagicMock(),
"prompt_toolkit.auto_suggest": MagicMock(),
}
with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
"os.environ", clean_env, clear=False
):
import cli as _cli_mod
_cli_mod = importlib.reload(_cli_mod)
with patch.object(_cli_mod, "get_tool_definitions", return_value=[]), patch.dict(
_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}
):
return _cli_mod.HermesCLI()
class TestSteerInlineDetector:
"""_should_handle_steer_command_inline gates the busy-path fast dispatch."""
def test_detects_steer_when_agent_running(self):
cli = _make_cli()
cli._agent_running = True
assert cli._should_handle_steer_command_inline("/steer focus on error handling") is True
def test_ignores_steer_when_agent_idle(self):
"""Idle-path /steer should fall through to the normal process_loop
dispatch so the queue-style fallback message is emitted."""
cli = _make_cli()
cli._agent_running = False
assert cli._should_handle_steer_command_inline("/steer do something") is False
def test_ignores_non_slash_input(self):
cli = _make_cli()
cli._agent_running = True
assert cli._should_handle_steer_command_inline("steer without slash") is False
assert cli._should_handle_steer_command_inline("") is False
def test_ignores_other_slash_commands(self):
cli = _make_cli()
cli._agent_running = True
assert cli._should_handle_steer_command_inline("/queue hello") is False
assert cli._should_handle_steer_command_inline("/stop") is False
assert cli._should_handle_steer_command_inline("/help") is False
def test_ignores_steer_with_attached_images(self):
"""Image payloads take the normal path; steer doesn't accept images."""
cli = _make_cli()
cli._agent_running = True
assert cli._should_handle_steer_command_inline("/steer text", has_images=True) is False
class TestSteerBusyPathDispatch:
"""When the detector fires, process_command('/steer ...') must call
agent.steer() directly rather than the idle-path fallback."""
def test_process_command_routes_to_agent_steer(self):
"""With _agent_running=True and agent.steer present, /steer reaches
agent.steer(payload), NOT _pending_input."""
cli = _make_cli()
cli._agent_running = True
cli.agent = MagicMock()
cli.agent.steer = MagicMock(return_value=True)
# Make sure the idle-path fallback would be observable if taken
cli._pending_input = MagicMock()
cli.process_command("/steer focus on errors")
cli.agent.steer.assert_called_once_with("focus on errors")
cli._pending_input.put.assert_not_called()
def test_idle_path_queues_as_next_turn(self):
"""Control — when the agent is NOT running, /steer correctly falls
back to next-turn queue semantics. Demonstrates why the fix was
needed: the queue path only works when you can actually drain it."""
cli = _make_cli()
cli._agent_running = False
cli.agent = MagicMock()
cli.agent.steer = MagicMock(return_value=True)
cli._pending_input = MagicMock()
cli.process_command("/steer would-be-next-turn")
# Idle path does NOT call agent.steer
cli.agent.steer.assert_not_called()
# It puts the payload in the queue as a normal next-turn message
cli._pending_input.put.assert_called_once_with("would-be-next-turn")
if __name__ == "__main__": # pragma: no cover
import pytest
pytest.main([__file__, "-v"])

View file

@ -186,6 +186,31 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
"HERMES_HOME_MODE",
"BROWSER_CDP_URL",
"CAMOFOX_URL",
# Platform allowlists — not credentials, but if set from any source
# (user shell, earlier leaky test, CI env), they change gateway auth
# behavior and flake button-authorization tests.
"TELEGRAM_ALLOWED_USERS",
"DISCORD_ALLOWED_USERS",
"WHATSAPP_ALLOWED_USERS",
"SLACK_ALLOWED_USERS",
"SIGNAL_ALLOWED_USERS",
"SIGNAL_GROUP_ALLOWED_USERS",
"EMAIL_ALLOWED_USERS",
"SMS_ALLOWED_USERS",
"MATTERMOST_ALLOWED_USERS",
"MATRIX_ALLOWED_USERS",
"DINGTALK_ALLOWED_USERS",
"FEISHU_ALLOWED_USERS",
"WECOM_ALLOWED_USERS",
"GATEWAY_ALLOWED_USERS",
"GATEWAY_ALLOW_ALL_USERS",
"TELEGRAM_ALLOW_ALL_USERS",
"DISCORD_ALLOW_ALL_USERS",
"WHATSAPP_ALLOW_ALL_USERS",
"SLACK_ALLOW_ALL_USERS",
"SIGNAL_ALLOW_ALL_USERS",
"EMAIL_ALLOW_ALL_USERS",
"SMS_ALLOW_ALL_USERS",
})
@ -258,6 +283,107 @@ def _isolate_hermes_home(_hermetic_environment):
return None
# ── Module-level state reset ───────────────────────────────────────────────
#
# Python modules are singletons per process, and pytest-xdist workers are
# long-lived. Module-level dicts/sets (tool registries, approval state,
# interrupt flags) and ContextVars persist across tests in the same worker,
# causing tests that pass alone to fail when run with siblings.
#
# Each entry in this fixture clears state that belongs to a specific module.
# New state buckets go here too — this is the single gate that prevents
# "works alone, flakes in CI" bugs from state leakage.
#
# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
# this closes; the running example was `test_command_guards` failing 12/15
# CI runs because ``tools.approval._session_approved`` carried approvals
# from one test's session into another's.
@pytest.fixture(autouse=True)
def _reset_module_state():
"""Clear module-level mutable state and ContextVars between tests.
Keeps state from leaking across tests on the same xdist worker. Modules
that don't exist yet (test collection before production import) are
skipped silently production import later creates fresh empty state.
"""
# --- tools.approval — the single biggest source of cross-test pollution ---
try:
from tools import approval as _approval_mod
_approval_mod._session_approved.clear()
_approval_mod._session_yolo.clear()
_approval_mod._permanent_approved.clear()
_approval_mod._pending.clear()
_approval_mod._gateway_queues.clear()
_approval_mod._gateway_notify_cbs.clear()
# ContextVar: reset to empty string so get_current_session_key()
# falls through to the env var / default path, matching a fresh
# process.
_approval_mod._approval_session_key.set("")
except Exception:
pass
# --- tools.interrupt — per-thread interrupt flag set ---
try:
from tools import interrupt as _interrupt_mod
with _interrupt_mod._lock:
_interrupt_mod._interrupted_threads.clear()
except Exception:
pass
# --- gateway.session_context — 9 ContextVars that represent
# the active gateway session. If set in one test and not reset,
# the next test's get_session_env() reads stale values.
try:
from gateway import session_context as _sc_mod
for _cv in (
_sc_mod._SESSION_PLATFORM,
_sc_mod._SESSION_CHAT_ID,
_sc_mod._SESSION_CHAT_NAME,
_sc_mod._SESSION_THREAD_ID,
_sc_mod._SESSION_USER_ID,
_sc_mod._SESSION_USER_NAME,
_sc_mod._SESSION_KEY,
_sc_mod._CRON_AUTO_DELIVER_PLATFORM,
_sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
_sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
):
_cv.set(_sc_mod._UNSET)
except Exception:
pass
# --- tools.env_passthrough — ContextVar<set[str]> with no default ---
# LookupError is normal if the test never set it. Setting it to an
# empty set unconditionally normalizes the starting state.
try:
from tools import env_passthrough as _envp_mod
_envp_mod._allowed_env_vars_var.set(set())
except Exception:
pass
# --- tools.credential_files — ContextVar<dict> ---
try:
from tools import credential_files as _credf_mod
_credf_mod._registered_files_var.set({})
except Exception:
pass
# --- tools.file_tools — per-task read history + file-ops cache ---
# _read_tracker accumulates per-task_id read history for loop detection,
# capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
# cap is hit faster than expected and capacity-related tests flake.
try:
from tools import file_tools as _ft_mod
with _ft_mod._read_tracker_lock:
_ft_mod._read_tracker.clear()
with _ft_mod._file_ops_lock:
_ft_mod._file_ops_cache.clear()
except Exception:
pass
yield
@pytest.fixture()
def tmp_dir(tmp_path):
"""Provide a temporary directory that is cleaned up automatically."""

View file

@ -1580,3 +1580,128 @@ class TestParallelTick:
end_s1 = [t for action, jid, t in call_times if action == "end" and jid == "s1"][0]
start_s2 = [t for action, jid, t in call_times if action == "start" and jid == "s2"][0]
assert start_s2 >= end_s1, "Jobs ran concurrently despite max_parallel=1"
class TestDeliverResultTimeoutCancelsFuture:
"""When future.result(timeout=60) raises TimeoutError in the live
adapter delivery path, _deliver_result must cancel the orphan
coroutine so it cannot duplicate-send after the standalone fallback.
"""
def test_live_adapter_timeout_cancels_future_and_falls_back(self):
"""End-to-end: live adapter hangs past the 60s budget, _deliver_result
patches the timeout down to a fast value, confirms future.cancel() fires,
and verifies the standalone fallback path still delivers."""
from gateway.config import Platform
from concurrent.futures import Future
# Live adapter whose send() coroutine never resolves within the budget
adapter = AsyncMock()
adapter.send.return_value = MagicMock(success=True)
pconfig = MagicMock()
pconfig.enabled = True
mock_cfg = MagicMock()
mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
loop = MagicMock()
loop.is_running.return_value = True
# A real concurrent.futures.Future so .cancel() has real semantics,
# but we override .result() to raise TimeoutError exactly like the
# 60s wait firing in production.
captured_future = Future()
cancel_calls = []
original_cancel = captured_future.cancel
def tracking_cancel():
cancel_calls.append(True)
return original_cancel()
captured_future.cancel = tracking_cancel
captured_future.result = MagicMock(side_effect=TimeoutError("timed out"))
def fake_run_coro(coro, _loop):
coro.close()
return captured_future
job = {
"id": "timeout-job",
"deliver": "origin",
"origin": {"platform": "telegram", "chat_id": "123"},
}
standalone_send = AsyncMock(return_value={"success": True})
with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro), \
patch("tools.send_message_tool._send_to_platform", new=standalone_send):
result = _deliver_result(
job,
"Hello world",
adapters={Platform.TELEGRAM: adapter},
loop=loop,
)
# 1. The orphan future was cancelled on timeout (the bug fix)
assert cancel_calls == [True], "future.cancel() must fire on TimeoutError"
# 2. The standalone fallback delivered — no double send, no silent drop
assert result is None, f"expected successful delivery, got error: {result!r}"
standalone_send.assert_awaited_once()
class TestSendMediaTimeoutCancelsFuture:
"""Same orphan-coroutine guarantee for _send_media_via_adapter's
future.result(timeout=30) call. If this times out mid-batch, the
in-flight coroutine must be cancelled before the next file is tried.
"""
def test_media_send_timeout_cancels_future_and_continues(self):
"""End-to-end: _send_media_via_adapter with a future whose .result()
raises TimeoutError. Assert cancel() fires and the loop proceeds
to the next file rather than hanging or crashing."""
from concurrent.futures import Future
adapter = MagicMock()
adapter.send_image_file = AsyncMock()
adapter.send_video = AsyncMock()
# First file: future that times out. Second file: future that resolves OK.
timeout_future = Future()
timeout_cancel_calls = []
original_cancel = timeout_future.cancel
def tracking_cancel():
timeout_cancel_calls.append(True)
return original_cancel()
timeout_future.cancel = tracking_cancel
timeout_future.result = MagicMock(side_effect=TimeoutError("timed out"))
ok_future = Future()
ok_future.set_result(MagicMock(success=True))
futures_iter = iter([timeout_future, ok_future])
def fake_run_coro(coro, _loop):
coro.close()
return next(futures_iter)
media_files = [
("/tmp/slow.png", False), # times out
("/tmp/fast.mp4", False), # succeeds
]
loop = MagicMock()
job = {"id": "media-timeout"}
with patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
# Should not raise — the except Exception clause swallows the timeout
_send_media_via_adapter(adapter, "chat-1", media_files, None, loop, job)
# 1. The timed-out future was cancelled (the bug fix)
assert timeout_cancel_calls == [True], "future.cancel() must fire on TimeoutError"
# 2. Second file still got dispatched — one timeout doesn't abort the batch
adapter.send_video.assert_called_once()
assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4"

View file

@ -355,8 +355,17 @@ async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path
async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
"""Verify the normal (non-internal) path still triggers pairing for unknown users."""
import gateway.run as gateway_run
import gateway.pairing as pairing_mod
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
# gateway.pairing.PAIRING_DIR is a module-level constant captured at
# import time from whichever HERMES_HOME was set then. Per-test
# HERMES_HOME redirection in conftest doesn't retroactively move it.
# Override directly so pairing rate-limit state lives in this test's
# tmp_path (and so stale state from prior xdist workers can't leak in).
pairing_dir = tmp_path / "pairing"
pairing_dir.mkdir()
monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir)
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
# Clear env vars that could let all users through (loaded by

View file

@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
import pytest
from gateway.config import Platform, StreamingConfig
from gateway.platforms.base import resolve_proxy_url
from gateway.run import GatewayRunner
from gateway.session import SessionSource
@ -133,6 +134,15 @@ class TestGetProxyUrl:
assert runner._get_proxy_url() is None
class TestResolveProxyUrl:
def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch):
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
monkeypatch.delenv(key, raising=False)
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
assert resolve_proxy_url() == "socks5://127.0.0.1:1080/"
class TestRunAgentProxyDispatch:
"""Test that _run_agent() delegates to proxy when configured."""

View file

@ -184,8 +184,15 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
async def stop(self):
return None
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
# get_running_pid returns 42 before we kill the old gateway, then None
# after remove_pid_file() clears the record (reflects real behavior).
_pid_state = {"alive": True}
def _mock_get_running_pid():
return 42 if _pid_state["alive"] else None
def _mock_remove_pid_file():
_pid_state["alive"] = False
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force)))
monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
@ -253,8 +260,13 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
async def stop(self):
return None
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
_pid_state = {"alive": True}
def _mock_get_running_pid():
return 42 if _pid_state["alive"] else None
def _mock_remove_pid_file():
_pid_state["alive"] = False
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker)
monkeypatch.setattr("gateway.status.terminate_pid", record_terminate)

View file

@ -356,6 +356,28 @@ class TestBuildSessionContextPrompt:
assert "**User:** Alice" in prompt
assert "Multi-user thread" not in prompt
def test_shared_non_thread_group_prompt_hides_single_user(self):
"""Shared non-thread group sessions should avoid pinning one user."""
config = GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
},
group_sessions_per_user=False,
)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1002285219667",
chat_name="Test Group",
chat_type="group",
user_name="Alice",
)
ctx = build_session_context(source, config)
prompt = build_session_context_prompt(ctx)
assert "Multi-user session" in prompt
assert "[sender name]" in prompt
assert "**User:** Alice" not in prompt
def test_dm_thread_shows_user_not_multi(self):
"""DM threads are single-user and should show User, not multi-user note."""
config = GatewayConfig(

View file

@ -0,0 +1,70 @@
import pytest
from gateway.config import GatewayConfig, Platform, PlatformConfig
from gateway.platforms.base import MessageEvent
from gateway.run import GatewayRunner
from gateway.session import SessionSource
def _make_runner(config: GatewayConfig) -> GatewayRunner:
runner = object.__new__(GatewayRunner)
runner.config = config
runner.adapters = {}
runner._model = "openai/gpt-4.1-mini"
runner._base_url = None
return runner
@pytest.mark.asyncio
async def test_preprocess_prefixes_sender_for_shared_non_thread_group_session():
runner = _make_runner(
GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
},
group_sessions_per_user=False,
)
)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1002285219667",
chat_name="Test Group",
chat_type="group",
user_name="Alice",
)
event = MessageEvent(text="hello", source=source)
result = await runner._prepare_inbound_message_text(
event=event,
source=source,
history=[],
)
assert result == "[Alice] hello"
@pytest.mark.asyncio
async def test_preprocess_keeps_plain_text_for_default_group_sessions():
runner = _make_runner(
GatewayConfig(
platforms={
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
},
)
)
source = SessionSource(
platform=Platform.TELEGRAM,
chat_id="-1002285219667",
chat_name="Test Group",
chat_type="group",
user_name="Alice",
)
event = MessageEvent(text="hello", source=source)
result = await runner._prepare_inbound_message_text(
event=event,
source=source,
history=[],
)
assert result == "hello"

View file

@ -306,7 +306,13 @@ class TestSignalSessionSource:
class TestSignalPhoneRedaction:
@pytest.fixture(autouse=True)
def _ensure_redaction_enabled(self, monkeypatch):
# agent.redact snapshots _REDACT_ENABLED at import time from the
# HERMES_REDACT_SECRETS env var. monkeypatch.delenv is too late —
# the module was already imported during test collection with
# whatever value was in the env then. Force the flag directly.
# See skill: xdist-cross-test-pollution Pattern 5.
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
def test_us_number(self):
from agent.redact import redact_sensitive_text

View file

@ -19,6 +19,30 @@ class TestGatewayPidState:
assert isinstance(payload["argv"], list)
assert payload["argv"]
def test_write_pid_file_is_atomic_against_concurrent_writers(self, tmp_path, monkeypatch):
"""Regression: two concurrent --replace invocations must not both win.
Without O_CREAT|O_EXCL, two processes racing through start_gateway()'s
termination-wait would both write to gateway.pid, silently overwriting
each other and leaving multiple gateway instances alive (#11718).
"""
import pytest
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
# First write wins.
status.write_pid_file()
assert (tmp_path / "gateway.pid").exists()
# Second write (simulating a racing --replace that missed the earlier
# guards) must raise FileExistsError rather than clobber the record.
with pytest.raises(FileExistsError):
status.write_pid_file()
# Original record is preserved.
payload = json.loads((tmp_path / "gateway.pid").read_text())
assert payload["pid"] == os.getpid()
def test_get_running_pid_rejects_live_non_gateway_pid(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
pid_path = tmp_path / "gateway.pid"

View file

@ -71,7 +71,17 @@ def test_group_messages_can_require_direct_trigger_via_config():
assert adapter._should_process_message(_group_message("hello everyone")) is False
assert adapter._should_process_message(_group_message("hi @hermes_bot", entities=[_mention_entity("hi @hermes_bot")])) is True
assert adapter._should_process_message(_group_message("replying", reply_to_bot=True)) is True
assert adapter._should_process_message(_group_message("/status"), is_command=True) is True
# Commands must also respect require_mention when it is enabled
assert adapter._should_process_message(_group_message("/status"), is_command=True) is False
# But commands with @mention still pass (Telegram emits a MENTION entity
# for /cmd@botname — the bot menu and python-telegram-bot's CommandHandler
# rely on this same mechanism)
assert adapter._should_process_message(
_group_message("/status@hermes_bot", entities=[_mention_entity("/status@hermes_bot")])
) is True
# And commands still pass unconditionally when require_mention is disabled
adapter_no_mention = _make_adapter(require_mention=False)
assert adapter_no_mention._should_process_message(_group_message("/status"), is_command=True) is True
def test_free_response_chats_bypass_mention_requirement():

View file

@ -0,0 +1,100 @@
"""Tests for GHSA-3vpc-7q5r-276h — Telegram webhook secret required.
Previously, when TELEGRAM_WEBHOOK_URL was set but TELEGRAM_WEBHOOK_SECRET
was not, python-telegram-bot received secret_token=None and the webhook
endpoint accepted any HTTP POST.
The fix refuses to start the adapter in webhook mode without the secret.
"""
from __future__ import annotations
import re
import sys
from pathlib import Path
import pytest
_repo = str(Path(__file__).resolve().parents[2])
if _repo not in sys.path:
sys.path.insert(0, _repo)
class TestTelegramWebhookSecretRequired:
"""Direct source-level check of the webhook-secret guard.
The guard is embedded in TelegramAdapter.connect() and hard to isolate
via mocks (requires a full python-telegram-bot ApplicationBuilder
chain). These tests exercise it via source inspection verifying the
check exists, raises RuntimeError with the advisory link, and only
fires in webhook mode. End-to-end validation is covered by CI +
manual deployment tests.
"""
def _get_source(self) -> str:
path = Path(_repo) / "gateway" / "platforms" / "telegram.py"
return path.read_text(encoding="utf-8")
def test_webhook_branch_checks_secret(self):
"""The webhook-mode branch of connect() must read
TELEGRAM_WEBHOOK_SECRET and refuse when empty."""
src = self._get_source()
# The guard must appear after TELEGRAM_WEBHOOK_URL is set
assert re.search(
r'TELEGRAM_WEBHOOK_SECRET.*?\.strip\(\)\s*\n\s*if not webhook_secret:',
src, re.DOTALL,
), (
"TelegramAdapter.connect() must strip TELEGRAM_WEBHOOK_SECRET "
"and raise when the secret is empty — see GHSA-3vpc-7q5r-276h"
)
def test_guard_raises_runtime_error(self):
"""The guard raises RuntimeError (not a silent log) so operators
see the failure at startup."""
src = self._get_source()
# Between the "if not webhook_secret:" line and the next blank
# line block, we should see a RuntimeError being raised
guard_match = re.search(
r'if not webhook_secret:\s*\n\s*raise\s+RuntimeError\(',
src,
)
assert guard_match, (
"Missing webhook secret must raise RuntimeError — silent "
"fall-through was the original GHSA-3vpc-7q5r-276h bypass"
)
def test_guard_message_includes_advisory_link(self):
"""The RuntimeError message should reference the advisory so
operators can read the full context."""
src = self._get_source()
assert "GHSA-3vpc-7q5r-276h" in src, (
"Guard error message must cite the advisory for operator context"
)
def test_guard_message_explains_remediation(self):
"""The error should tell the operator how to fix it."""
src = self._get_source()
# Should mention how to generate a secret
assert "openssl rand" in src or "TELEGRAM_WEBHOOK_SECRET=" in src, (
"Guard error message should show operators how to set "
"TELEGRAM_WEBHOOK_SECRET"
)
def test_polling_branch_has_no_secret_guard(self):
"""Polling mode (else-branch) must NOT require the webhook secret —
polling authenticates via the bot token, not a webhook secret."""
src = self._get_source()
# The guard should appear inside the `if webhook_url:` branch,
# not the `else:` polling branch. Rough check: the raise is
# followed (within ~60 lines) by an `else:` that starts the
# polling branch, and there's no secret-check in that polling
# branch.
webhook_block = re.search(
r'if webhook_url:\s*\n(.*?)\n else:\s*\n(.*?)\n',
src, re.DOTALL,
)
if webhook_block:
webhook_body = webhook_block.group(1)
polling_body = webhook_block.group(2)
assert "TELEGRAM_WEBHOOK_SECRET" in webhook_body
assert "TELEGRAM_WEBHOOK_SECRET" not in polling_body

View file

@ -175,3 +175,79 @@ class TestUsageCachedAgent:
result = await runner._handle_usage_command(event)
assert "Cost: included" in result
class TestUsageAccountSection:
"""Account-limits section appended to /usage output (PR #2486)."""
@pytest.mark.asyncio
async def test_usage_command_includes_account_section(self, monkeypatch):
agent = _make_mock_agent(provider="openai-codex")
agent.base_url = "https://chatgpt.com/backend-api/codex"
agent.api_key = "unused"
runner = _make_runner(SK, cached_agent=agent)
event = MagicMock()
monkeypatch.setattr(
"gateway.run.fetch_account_usage",
lambda provider, base_url=None, api_key=None: object(),
)
monkeypatch.setattr(
"gateway.run.render_account_usage_lines",
lambda snapshot, markdown=False: [
"📈 **Account limits**",
"Provider: openai-codex (Pro)",
"Session: 85% remaining (15% used)",
],
)
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
mock_cost.return_value = MagicMock(amount_usd=None, status="included")
result = await runner._handle_usage_command(event)
assert "📊 **Session Token Usage**" in result
assert "📈 **Account limits**" in result
assert "Provider: openai-codex (Pro)" in result
@pytest.mark.asyncio
async def test_usage_command_uses_persisted_provider_when_agent_not_running(self, monkeypatch):
runner = _make_runner(SK)
runner._session_db = MagicMock()
runner._session_db.get_session.return_value = {
"billing_provider": "openai-codex",
"billing_base_url": "https://chatgpt.com/backend-api/codex",
}
session_entry = MagicMock()
session_entry.session_id = "sess-1"
runner.session_store.get_or_create_session.return_value = session_entry
runner.session_store.load_transcript.return_value = [
{"role": "user", "content": "earlier"},
]
calls = {}
async def _fake_to_thread(fn, *args, **kwargs):
calls["args"] = args
calls["kwargs"] = kwargs
return fn(*args, **kwargs)
monkeypatch.setattr("gateway.run.asyncio.to_thread", _fake_to_thread)
monkeypatch.setattr(
"gateway.run.fetch_account_usage",
lambda provider, base_url=None, api_key=None: object(),
)
monkeypatch.setattr(
"gateway.run.render_account_usage_lines",
lambda snapshot, markdown=False: [
"📈 **Account limits**",
"Provider: openai-codex (Pro)",
],
)
event = MagicMock()
result = await runner._handle_usage_command(event)
assert calls["args"] == ("openai-codex",)
assert calls["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
assert "📊 **Session Info**" in result
assert "📈 **Account limits**" in result

View file

@ -921,17 +921,13 @@ class TestKimiMoonshotModelListIsolation:
leaked = set(moonshot_models) & coding_plan_only
assert not leaked, f"Moonshot list contains Coding Plan-only models: {leaked}"
def test_moonshot_list_contains_shared_models(self):
def test_moonshot_list_non_empty(self):
from hermes_cli.main import _PROVIDER_MODELS
moonshot_models = _PROVIDER_MODELS["moonshot"]
assert "kimi-k2.5" in moonshot_models
assert "kimi-k2-thinking" in moonshot_models
assert len(_PROVIDER_MODELS["moonshot"]) >= 1
def test_coding_plan_list_contains_plan_specific_models(self):
def test_coding_plan_list_non_empty(self):
from hermes_cli.main import _PROVIDER_MODELS
coding_models = _PROVIDER_MODELS["kimi-coding"]
assert "kimi-for-coding" in coding_models
assert "kimi-k2-thinking-turbo" in coding_models
assert len(_PROVIDER_MODELS["kimi-coding"]) >= 1
# =============================================================================
@ -944,14 +940,12 @@ class TestHuggingFaceModels:
def test_main_provider_models_has_huggingface(self):
from hermes_cli.main import _PROVIDER_MODELS
assert "huggingface" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["huggingface"]
assert len(models) >= 6, "Expected at least 6 curated HF models"
assert len(_PROVIDER_MODELS["huggingface"]) >= 1
def test_models_py_has_huggingface(self):
from hermes_cli.models import _PROVIDER_MODELS
assert "huggingface" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["huggingface"]
assert len(models) >= 6
assert len(_PROVIDER_MODELS["huggingface"]) >= 1
def test_model_lists_match(self):
"""Model lists in main.py and models.py should be identical."""

View file

@ -115,12 +115,12 @@ class TestArceeCredentials:
class TestArceeModelCatalog:
def test_static_model_list(self):
"""Arcee has a static _PROVIDER_MODELS catalog entry. Specific model
names change with releases and don't belong in tests.
"""
from hermes_cli.models import _PROVIDER_MODELS
assert "arcee" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["arcee"]
assert "trinity-large-thinking" in models
assert "trinity-large-preview" in models
assert "trinity-mini" in models
assert len(_PROVIDER_MODELS["arcee"]) >= 1
def test_canonical_provider_entry(self):
from hermes_cli.models import CANONICAL_PROVIDERS

View file

@ -1011,3 +1011,466 @@ def test_seed_from_singletons_respects_codex_suppression(tmp_path, monkeypatch):
# Verify the auth store was NOT modified (no auto-import happened)
after = json.loads((hermes_home / "auth.json").read_text())
assert "openai-codex" not in after.get("providers", {})
def test_auth_remove_env_seeded_suppresses_shell_exported_var(tmp_path, monkeypatch, capsys):
"""`hermes auth remove xai 1` must stick even when the env var is exported
by the shell (not written into ~/.hermes/.env). Before PR for #13371 the
removal silently restored on next load_pool() because _seed_from_env()
re-read os.environ. Now env:<VAR> is suppressed in auth.json.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# Simulate shell export (NOT written to .env)
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
(hermes_home / ".env").write_text("")
_write_auth_store(
tmp_path,
{
"version": 1,
"credential_pool": {
"xai": [{
"id": "env-1",
"label": "XAI_API_KEY",
"auth_type": "api_key",
"priority": 0,
"source": "env:XAI_API_KEY",
"access_token": "sk-xai-shell-export",
"base_url": "https://api.x.ai/v1",
}]
},
},
)
from types import SimpleNamespace
from hermes_cli.auth_commands import auth_remove_command
auth_remove_command(SimpleNamespace(provider="xai", target="1"))
# Suppression marker written
after = json.loads((hermes_home / "auth.json").read_text())
assert "env:XAI_API_KEY" in after.get("suppressed_sources", {}).get("xai", [])
# Diagnostic printed pointing at the shell
out = capsys.readouterr().out
assert "still set in your shell environment" in out
assert "Cleared XAI_API_KEY from .env" not in out # wasn't in .env
# Fresh simulation: shell re-exports, reload pool
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
from agent.credential_pool import load_pool
pool = load_pool("xai")
assert not pool.has_credentials(), "pool must stay empty — env:XAI_API_KEY suppressed"
def test_auth_remove_env_seeded_dotenv_only_no_shell_hint(tmp_path, monkeypatch, capsys):
"""When the env var lives only in ~/.hermes/.env (not the shell), the
shell-hint should NOT be printed avoid scaring the user about a
non-existent shell export.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# Key ONLY in .env, shell must not have it
monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False)
(hermes_home / ".env").write_text("DEEPSEEK_API_KEY=sk-ds-only\n")
# Mimic load_env() populating os.environ
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-ds-only")
_write_auth_store(
tmp_path,
{
"version": 1,
"credential_pool": {
"deepseek": [{
"id": "env-1",
"label": "DEEPSEEK_API_KEY",
"auth_type": "api_key",
"priority": 0,
"source": "env:DEEPSEEK_API_KEY",
"access_token": "sk-ds-only",
}]
},
},
)
from types import SimpleNamespace
from hermes_cli.auth_commands import auth_remove_command
auth_remove_command(SimpleNamespace(provider="deepseek", target="1"))
out = capsys.readouterr().out
assert "Cleared DEEPSEEK_API_KEY from .env" in out
assert "still set in your shell environment" not in out
assert (hermes_home / ".env").read_text().strip() == ""
def test_auth_add_clears_env_suppression_for_provider(tmp_path, monkeypatch):
"""Re-adding a credential via `hermes auth add <provider>` clears any
env:<VAR> suppression marker strong signal the user wants auth back.
Matches the Codex device_code re-link behaviour.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.delenv("XAI_API_KEY", raising=False)
_write_auth_store(
tmp_path,
{
"version": 1,
"providers": {},
"suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
},
)
from types import SimpleNamespace
from hermes_cli.auth import is_source_suppressed
from hermes_cli.auth_commands import auth_add_command
assert is_source_suppressed("xai", "env:XAI_API_KEY") is True
auth_add_command(SimpleNamespace(
provider="xai", auth_type="api_key",
api_key="sk-xai-manual", label="manual",
))
assert is_source_suppressed("xai", "env:XAI_API_KEY") is False
def test_seed_from_env_respects_env_suppression(tmp_path, monkeypatch):
"""_seed_from_env() must skip env:<VAR> sources that the user suppressed
via `hermes auth remove`. This is the gate that prevents shell-exported
keys from resurrecting removed credentials.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
}))
from agent.credential_pool import _seed_from_env
entries = []
changed, active = _seed_from_env("xai", entries)
assert changed is False
assert entries == []
assert active == set()
def test_seed_from_env_respects_openrouter_suppression(tmp_path, monkeypatch):
"""OpenRouter is the special-case branch in _seed_from_env; verify it
honours suppression too.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-shell-export")
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {"openrouter": ["env:OPENROUTER_API_KEY"]},
}))
from agent.credential_pool import _seed_from_env
entries = []
changed, active = _seed_from_env("openrouter", entries)
assert changed is False
assert entries == []
assert active == set()
# =============================================================================
# Unified credential-source stickiness — every source Hermes reads from has a
# registered RemovalStep in agent.credential_sources, and every seeding path
# gates on is_source_suppressed. Below: one test per source proving remove
# sticks across a fresh load_pool() call.
# =============================================================================
def test_seed_from_singletons_respects_nous_suppression(tmp_path, monkeypatch):
"""nous device_code must not re-seed from auth.json when suppressed."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {"nous": {"access_token": "tok", "refresh_token": "r", "expires_at": 9999999999}},
"suppressed_sources": {"nous": ["device_code"]},
}))
from agent.credential_pool import _seed_from_singletons
entries = []
changed, active = _seed_from_singletons("nous", entries)
assert changed is False
assert entries == []
assert active == set()
def test_seed_from_singletons_respects_copilot_suppression(tmp_path, monkeypatch):
"""copilot gh_cli must not re-seed when suppressed."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {"copilot": ["gh_cli"]},
}))
# Stub resolve_copilot_token to return a live token
import hermes_cli.copilot_auth as ca
monkeypatch.setattr(ca, "resolve_copilot_token", lambda: ("ghp_fake", "gh auth token"))
from agent.credential_pool import _seed_from_singletons
entries = []
changed, active = _seed_from_singletons("copilot", entries)
assert changed is False
assert entries == []
assert active == set()
def test_seed_from_singletons_respects_qwen_suppression(tmp_path, monkeypatch):
"""qwen-oauth qwen-cli must not re-seed from ~/.qwen/oauth_creds.json when suppressed."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {"qwen-oauth": ["qwen-cli"]},
}))
import hermes_cli.auth as ha
monkeypatch.setattr(ha, "resolve_qwen_runtime_credentials", lambda **kw: {
"api_key": "tok", "source": "qwen-cli", "base_url": "https://q",
})
from agent.credential_pool import _seed_from_singletons
entries = []
changed, active = _seed_from_singletons("qwen-oauth", entries)
assert changed is False
assert entries == []
assert active == set()
def test_seed_from_singletons_respects_hermes_pkce_suppression(tmp_path, monkeypatch):
"""anthropic hermes_pkce must not re-seed from ~/.hermes/.anthropic_oauth.json when suppressed."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
import yaml
(hermes_home / "config.yaml").write_text(yaml.dump({"model": {"provider": "anthropic", "model": "claude"}}))
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {"anthropic": ["hermes_pkce"]},
}))
# Stub the readers so only hermes_pkce is "available"; claude_code returns None
import agent.anthropic_adapter as aa
monkeypatch.setattr(aa, "read_hermes_oauth_credentials", lambda: {
"accessToken": "tok", "refreshToken": "r", "expiresAt": 9999999999000,
})
monkeypatch.setattr(aa, "read_claude_code_credentials", lambda: None)
from agent.credential_pool import _seed_from_singletons
entries = []
changed, active = _seed_from_singletons("anthropic", entries)
# hermes_pkce suppressed, claude_code returns None → nothing should be seeded
assert entries == []
assert "hermes_pkce" not in active
def test_seed_custom_pool_respects_config_suppression(tmp_path, monkeypatch):
"""Custom provider config:<name> source must not re-seed when suppressed."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
import yaml
(hermes_home / "config.yaml").write_text(yaml.dump({
"model": {},
"custom_providers": [
{"name": "my", "base_url": "https://c.example.com", "api_key": "sk-custom"},
],
}))
from agent.credential_pool import _seed_custom_pool, get_custom_provider_pool_key
pool_key = get_custom_provider_pool_key("https://c.example.com")
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {},
"suppressed_sources": {pool_key: ["config:my"]},
}))
entries = []
changed, active = _seed_custom_pool(pool_key, entries)
assert changed is False
assert entries == []
assert "config:my" not in active
def test_credential_sources_registry_has_expected_steps():
"""Sanity check — the registry contains the expected RemovalSteps.
Guards against accidentally dropping a step during future refactors.
If you add a new credential source, add it to the expected set below.
"""
from agent.credential_sources import _REGISTRY
descriptions = {step.description for step in _REGISTRY}
expected = {
"gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
"Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
"~/.claude/.credentials.json",
"~/.hermes/.anthropic_oauth.json",
"auth.json providers.nous",
"auth.json providers.openai-codex + ~/.codex/auth.json",
"~/.qwen/oauth_creds.json",
"Custom provider config.yaml api_key field",
}
assert descriptions == expected, f"Registry mismatch. Got: {descriptions}"
def test_credential_sources_find_step_returns_none_for_manual():
"""Manual entries have nothing external to clean up — no step registered."""
from agent.credential_sources import find_removal_step
assert find_removal_step("openrouter", "manual") is None
assert find_removal_step("xai", "manual") is None
def test_credential_sources_find_step_copilot_before_generic_env(tmp_path, monkeypatch):
"""copilot env:GH_TOKEN must dispatch to the copilot step, not the
generic env-var step. The copilot step handles the duplicate-source
problem (same token seeded as both gh_cli and env:<VAR>); the generic
env step would only suppress one of the variants.
"""
from agent.credential_sources import find_removal_step
step = find_removal_step("copilot", "env:GH_TOKEN")
assert step is not None
assert "copilot" in step.description.lower() or "gh" in step.description.lower()
# Generic step still matches any other provider's env var
step = find_removal_step("xai", "env:XAI_API_KEY")
assert step is not None
assert "env-seeded" in step.description.lower()
def test_auth_remove_copilot_suppresses_all_variants(tmp_path, monkeypatch):
"""Removing any copilot source must suppress gh_cli + all env:* variants
so the duplicate-seed paths don't resurrect the credential.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
_write_auth_store(
tmp_path,
{
"version": 1,
"credential_pool": {
"copilot": [{
"id": "c1",
"label": "gh auth token",
"auth_type": "api_key",
"priority": 0,
"source": "gh_cli",
"access_token": "ghp_fake",
}]
},
},
)
from types import SimpleNamespace
from hermes_cli.auth import is_source_suppressed
from hermes_cli.auth_commands import auth_remove_command
auth_remove_command(SimpleNamespace(provider="copilot", target="1"))
assert is_source_suppressed("copilot", "gh_cli")
assert is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
assert is_source_suppressed("copilot", "env:GH_TOKEN")
assert is_source_suppressed("copilot", "env:GITHUB_TOKEN")
def test_auth_add_clears_all_suppressions_including_non_env(tmp_path, monkeypatch):
"""Re-adding a credential via `hermes auth add <provider>` clears ALL
suppression markers for the provider, not just env:*. This matches
the single "re-engage" semantic the user wants auth back, period.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
_write_auth_store(
tmp_path,
{
"version": 1,
"providers": {},
"suppressed_sources": {
"copilot": ["gh_cli", "env:GH_TOKEN", "env:COPILOT_GITHUB_TOKEN"],
},
},
)
from types import SimpleNamespace
from hermes_cli.auth import is_source_suppressed
from hermes_cli.auth_commands import auth_add_command
auth_add_command(SimpleNamespace(
provider="copilot", auth_type="api_key",
api_key="ghp-manual", label="m",
))
assert not is_source_suppressed("copilot", "gh_cli")
assert not is_source_suppressed("copilot", "env:GH_TOKEN")
assert not is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
def test_auth_remove_codex_manual_device_code_suppresses_canonical(tmp_path, monkeypatch):
"""Removing a manual:device_code entry (from `hermes auth add openai-codex`)
must suppress the canonical ``device_code`` key, not ``manual:device_code``.
The re-seed gate in _seed_from_singletons checks ``device_code``.
"""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
_write_auth_store(
tmp_path,
{
"version": 1,
"providers": {"openai-codex": {"tokens": {"access_token": "t", "refresh_token": "r"}}},
"credential_pool": {
"openai-codex": [{
"id": "cdx",
"label": "manual-codex",
"auth_type": "oauth",
"priority": 0,
"source": "manual:device_code",
"access_token": "t",
}]
},
},
)
from types import SimpleNamespace
from hermes_cli.auth import is_source_suppressed
from hermes_cli.auth_commands import auth_remove_command
auth_remove_command(SimpleNamespace(provider="openai-codex", target="1"))
assert is_source_suppressed("openai-codex", "device_code")

View file

@ -459,7 +459,8 @@ class TestCustomProviderCompatibility:
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == 21
from hermes_cli.config import DEFAULT_CONFIG
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
assert raw["providers"]["openai-direct"] == {
"api": "https://api.openai.com/v1",
"api_key": "test-key",
@ -501,7 +502,8 @@ class TestCustomProviderCompatibility:
assert compatible[0]["provider_key"] == "openai-direct"
assert compatible[0]["api_mode"] == "codex_responses"
def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path):
def test_compatible_custom_providers_prefers_base_url_then_url_then_api(self, tmp_path):
"""URL field precedence is base_url > url > api (PR #9332)."""
config_path = tmp_path / "config.yaml"
config_path.write_text(
yaml.safe_dump(
@ -526,7 +528,7 @@ class TestCustomProviderCompatibility:
assert compatible == [
{
"name": "My Provider",
"base_url": "https://api.example.com/v1",
"base_url": "https://base.example.com/v1",
"provider_key": "my-provider",
}
]
@ -606,7 +608,8 @@ class TestInterimAssistantMessageConfig:
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == 21
from hermes_cli.config import DEFAULT_CONFIG
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
assert raw["display"]["tool_progress"] == "off"
assert raw["display"]["interim_assistant_messages"] is True
@ -626,7 +629,8 @@ class TestDiscordChannelPromptsConfig:
migrate_config(interactive=False, quiet=True)
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
assert raw["_config_version"] == 21
from hermes_cli.config import DEFAULT_CONFIG
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
assert raw["discord"]["auto_thread"] is True
assert raw["discord"]["channel_prompts"] == {}

View file

@ -125,18 +125,12 @@ class TestGeminiCredentials:
# ── Model Catalog ──
class TestGeminiModelCatalog:
def test_provider_models_exist(self):
def test_provider_entry_exists(self):
"""Gemini provider has a model catalog entry. Specific model names
are data that changes with Google releases and don't belong in tests.
"""
assert "gemini" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["gemini"]
assert "gemini-2.5-pro" in models
assert "gemini-2.5-flash" in models
assert "gemma-4-31b-it" not in models
def test_provider_models_has_3x(self):
models = _PROVIDER_MODELS["gemini"]
assert "gemini-3.1-pro-preview" in models
assert "gemini-3-flash-preview" in models
assert "gemini-3.1-flash-lite-preview" in models
assert len(_PROVIDER_MODELS["gemini"]) >= 1
def test_provider_label(self):
assert "gemini" in _PROVIDER_LABELS

View file

@ -457,29 +457,62 @@ class TestValidateApiNotFound:
assert "not found" in result["message"]
# -- validate — API unreachable — reject with guidance ----------------
# -- validate — API unreachable — soft-accept via catalog or warning --------
class TestValidateApiFallback:
def test_any_model_rejected_when_api_down(self):
result = _validate("anthropic/claude-opus-4.6", api_models=None)
assert result["accepted"] is False
assert result["persist"] is False
"""When /models is unreachable, the validator must accept the model (with
a warning) rather than reject it outright otherwise provider switches
fail in the gateway for any provider whose /models endpoint is down or
doesn't exist (e.g. opencode-go returns 404 HTML).
def test_unknown_model_also_rejected_when_api_down(self):
result = _validate("anthropic/claude-next-gen", api_models=None)
assert result["accepted"] is False
assert result["persist"] is False
assert "could not reach" in result["message"].lower()
Two paths:
1. Provider has a curated catalog (``_PROVIDER_MODELS`` / live fetch):
validate against it (recognized=True for known models,
recognized=False with 'Note:' for unknown).
2. Provider has no catalog: accept with a generic 'Note:' warning.
def test_zai_model_rejected_when_api_down(self):
In both cases ``accepted`` and ``persist`` must be True so the gateway can
write the ``_session_model_overrides`` entry.
"""
def test_known_model_accepted_via_catalog_when_api_down(self):
# Force the openrouter catalog lookup to return a deterministic list.
with patch(
"hermes_cli.models.provider_model_ids",
return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
):
result = _validate("anthropic/claude-opus-4.6", api_models=None)
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is True
def test_unknown_model_accepted_with_note_when_api_down(self):
with patch(
"hermes_cli.models.provider_model_ids",
return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
):
result = _validate("anthropic/claude-next-gen", api_models=None)
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
# Message flags it as unverified against the catalog.
assert "not found" in result["message"].lower() or "note" in result["message"].lower()
def test_zai_known_model_accepted_via_catalog_when_api_down(self):
# glm-5 is in the zai curated catalog (_PROVIDER_MODELS["zai"]).
result = _validate("glm-5", provider="zai", api_models=None)
assert result["accepted"] is False
assert result["persist"] is False
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is True
def test_unknown_provider_rejected_when_api_down(self):
result = _validate("some-model", provider="totally-unknown", api_models=None)
assert result["accepted"] is False
assert result["persist"] is False
def test_unknown_provider_soft_accepted_when_api_down(self):
# No catalog for unknown providers — soft-accept with a Note.
with patch("hermes_cli.models.provider_model_ids", return_value=[]):
result = _validate("some-model", provider="totally-unknown", api_models=None)
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
assert "note" in result["message"].lower()
def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self):
with patch(

View file

@ -88,6 +88,131 @@ class TestFetchOpenRouterModels:
assert models == OPENROUTER_MODELS
def test_filters_out_models_without_tool_support(self, monkeypatch):
"""Models whose supported_parameters omits 'tools' must not appear in the picker.
hermes-agent is tool-calling-first surfacing a non-tool model leads to
immediate runtime failures when the user selects it. Ported from
Kilo-Org/kilocode#9068.
"""
class _Resp:
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def read(self):
# opus-4.6 advertises tools → kept
# nano-image has explicit supported_parameters that OMITS tools → dropped
# qwen3.6-plus advertises tools → kept
return (
b'{"data":['
b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"},'
b'"supported_parameters":["temperature","tools","tool_choice"]},'
b'{"id":"google/gemini-3-pro-image-preview","pricing":{"prompt":"0.00001","completion":"0.00003"},'
b'"supported_parameters":["temperature","response_format"]},'
b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"},'
b'"supported_parameters":["tools","temperature"]}'
b']}'
)
# Include the image-only id in the curated list so it has a chance to be surfaced.
monkeypatch.setattr(
_models_mod,
"OPENROUTER_MODELS",
[
("anthropic/claude-opus-4.6", ""),
("google/gemini-3-pro-image-preview", ""),
("qwen/qwen3.6-plus", ""),
],
)
monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
models = fetch_openrouter_models(force_refresh=True)
ids = [mid for mid, _ in models]
assert "anthropic/claude-opus-4.6" in ids
assert "qwen/qwen3.6-plus" in ids
# Image-only model advertised supported_parameters WITHOUT tools → must be dropped.
assert "google/gemini-3-pro-image-preview" not in ids
def test_permissive_when_supported_parameters_missing(self, monkeypatch):
"""Models missing the supported_parameters field keep appearing in the picker.
Some OpenRouter-compatible gateways (Nous Portal, private mirrors, older
catalog snapshots) don't populate supported_parameters. Treating missing
as 'unknown → allow' prevents the picker from silently emptying on
those gateways.
"""
class _Resp:
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def read(self):
# No supported_parameters field at all on either entry.
return (
b'{"data":['
b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"}},'
b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"}}'
b']}'
)
monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
models = fetch_openrouter_models(force_refresh=True)
ids = [mid for mid, _ in models]
assert "anthropic/claude-opus-4.6" in ids
assert "qwen/qwen3.6-plus" in ids
class TestOpenRouterToolSupportHelper:
"""Unit tests for _openrouter_model_supports_tools (Kilo port #9068)."""
def test_tools_in_supported_parameters(self):
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools(
{"id": "x", "supported_parameters": ["temperature", "tools"]}
) is True
def test_tools_missing_from_supported_parameters(self):
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools(
{"id": "x", "supported_parameters": ["temperature", "response_format"]}
) is False
def test_supported_parameters_absent_is_permissive(self):
"""Missing field → allow (so older / non-OR gateways still work)."""
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools({"id": "x"}) is True
def test_supported_parameters_none_is_permissive(self):
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools({"id": "x", "supported_parameters": None}) is True
def test_supported_parameters_malformed_is_permissive(self):
"""Malformed (non-list) value → allow rather than silently drop."""
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools(
{"id": "x", "supported_parameters": "tools,temperature"}
) is True
def test_non_dict_item_is_permissive(self):
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools(None) is True
assert _openrouter_model_supports_tools("anthropic/claude-opus-4.6") is True
def test_empty_supported_parameters_list_drops_model(self):
"""Explicit empty list → no tools → drop."""
from hermes_cli.models import _openrouter_model_supports_tools
assert _openrouter_model_supports_tools(
{"id": "x", "supported_parameters": []}
) is False
class TestFindOpenrouterSlug:
def test_exact_match(self):

View file

@ -15,7 +15,7 @@ def test_opencode_go_appears_when_api_key_set():
opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set"
assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
assert opencode_go["models"] == ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
# opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when
# models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when
# the API is unavailable, e.g. in CI).

View file

@ -0,0 +1,133 @@
"""Tests for the static-catalog fallback in validate_requested_model.
OpenCode Go and OpenCode Zen publish an OpenAI-compatible API at paths that do
NOT expose ``/models`` (the path returns the marketing site's HTML 404). This
caused ``validate_requested_model`` to return ``accepted=False`` for every
model on those providers, which in turn made ``switch_model()`` fail and the
gateway's ``/model <name> --provider opencode-go`` command never write to
``_session_model_overrides``.
These tests cover the catalog-fallback path: when ``fetch_api_models`` returns
``None``, the validator must consult ``provider_model_ids()`` for the provider
(populated from ``_PROVIDER_MODELS``) rather than rejecting outright.
"""
from unittest.mock import patch
from hermes_cli.models import validate_requested_model
_UNREACHABLE_PROBE = {
"models": None,
"probed_url": "https://opencode.ai/zen/go/v1/models",
"resolved_base_url": "https://opencode.ai/zen/go/v1",
"suggested_base_url": None,
"used_fallback": False,
}
def _patched(func):
"""Decorator: force fetch_api_models / probe_api_models to simulate an
unreachable /models endpoint, proving the catalog path is used."""
def wrapper(*args, **kwargs):
with patch("hermes_cli.models.fetch_api_models", return_value=None), \
patch("hermes_cli.models.probe_api_models", return_value=_UNREACHABLE_PROBE):
return func(*args, **kwargs)
wrapper.__name__ = func.__name__
return wrapper
# ---------------------------------------------------------------------------
# opencode-go: curated catalog in _PROVIDER_MODELS
# ---------------------------------------------------------------------------
@_patched
def test_opencode_go_known_model_accepted():
"""A model present in the opencode-go curated catalog must be accepted
even when /models is unreachable."""
result = validate_requested_model("kimi-k2.6", "opencode-go")
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is True
assert result["message"] is None
@_patched
def test_opencode_go_known_model_case_insensitive():
"""Catalog lookup is case-insensitive."""
result = validate_requested_model("KIMI-K2.6", "opencode-go")
assert result["accepted"] is True
assert result["recognized"] is True
@_patched
def test_opencode_go_typo_auto_corrected():
"""A close typo (>= 0.9 similarity) is auto-corrected to the catalog
entry."""
# 'kimi-k2.55' vs 'kimi-k2.5' ratio ≈ 0.95 — within the 0.9 cutoff.
result = validate_requested_model("kimi-k2.55", "opencode-go")
assert result["accepted"] is True
assert result["recognized"] is True
assert result.get("corrected_model") == "kimi-k2.5"
@_patched
def test_opencode_go_unknown_model_accepted_with_suggestion():
"""An unknown model that has a medium-similarity match (>= 0.5 but < 0.9)
is accepted with recognized=False and a 'similar models' hint. The key
invariant: the gateway MUST be able to persist this override, so
accepted/persist must both be True."""
# 'kimi-k3-preview' vs 'kimi-k2.6' — similar enough to suggest, not to auto-correct.
result = validate_requested_model("kimi-k3-preview", "opencode-go")
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
assert "kimi-k3-preview" in result["message"]
assert "curated catalog" in result["message"]
@_patched
def test_opencode_go_totally_unknown_model_still_accepted():
"""A model with zero similarity to the catalog is still accepted (no
suggestion line) so the user can try a model that hasn't made it into the
curated list yet."""
result = validate_requested_model("some-brand-new-model", "opencode-go")
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
# No suggestion text (no close matches)
assert "Similar models" not in result["message"]
assert "opencode" in result["message"].lower() or "opencode go" in result["message"].lower()
# ---------------------------------------------------------------------------
# opencode-zen: same pattern as opencode-go
# ---------------------------------------------------------------------------
@_patched
def test_opencode_zen_known_model_accepted():
"""opencode-zen also uses _PROVIDER_MODELS; kimi-k2 is in its catalog."""
result = validate_requested_model("kimi-k2", "opencode-zen")
assert result["accepted"] is True
assert result["recognized"] is True
# ---------------------------------------------------------------------------
# Unknown provider with no catalog: soft-accept (honors the comment's intent)
# ---------------------------------------------------------------------------
@_patched
def test_provider_without_catalog_accepts_with_warning():
"""When a provider has no entry in _PROVIDER_MODELS and /models is
unreachable, accept the model with a 'Note:' warning rather than reject.
This matches the in-code comment: 'Accept and persist, but warn so typos
don't silently break things.'"""
# Use a made-up provider name that won't resolve to any catalog.
result = validate_requested_model("some-model", "provider-that-does-not-exist")
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
assert "Note:" in result["message"]

View file

@ -1412,3 +1412,90 @@ def test_named_custom_runtime_no_model_when_absent(monkeypatch):
resolved = rp.resolve_runtime_provider(requested="my-server")
assert "model" not in resolved
# ---------------------------------------------------------------------------
# GHSA-76xc-57q6-vm5m — Ollama URL substring leak
#
# Same bug class as the previously-fixed GHSA-xf8p-v2cg-h7h5 (OpenRouter).
# _resolve_openrouter_runtime's custom-endpoint branch selects OLLAMA_API_KEY
# when the base_url "looks like" ollama.com. Previous implementation used
# raw substring match; a custom base_url whose PATH or look-alike host
# merely contained "ollama.com" leaked OLLAMA_API_KEY to that endpoint.
# Fix: use base_url_host_matches (same helper as the OpenRouter sweep).
# ---------------------------------------------------------------------------
class TestOllamaUrlSubstringLeak:
"""Call-site regression tests for the fix in _resolve_openrouter_runtime."""
def _make_cfg(self, base_url):
return {"base_url": base_url, "api_key": "", "provider": "custom"}
def test_ollama_key_not_leaked_to_path_injection(self, monkeypatch):
"""http://127.0.0.1:9000/ollama.com/v1 — attacker endpoint with
ollama.com in PATH. Must resolve to OPENAI_API_KEY, not OLLAMA_API_KEY."""
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret")
monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"http://127.0.0.1:9000/ollama.com/v1"
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
resolved = rp.resolve_runtime_provider(requested="custom")
assert "ol-SECRET" not in resolved["api_key"], (
"OLLAMA_API_KEY must not be sent to an endpoint whose "
"hostname is not ollama.com (GHSA-76xc-57q6-vm5m)"
)
assert resolved["api_key"] == "oa-secret"
def test_ollama_key_not_leaked_to_lookalike_host(self, monkeypatch):
"""ollama.com.attacker.test — look-alike host. OLLAMA_API_KEY
must not be sent."""
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"http://ollama.com.attacker.test:9000/v1"
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
resolved = rp.resolve_runtime_provider(requested="custom")
assert "ol-SECRET" not in resolved["api_key"]
assert resolved["api_key"] == "oa-secret"
def test_ollama_key_sent_to_genuine_ollama_com(self, monkeypatch):
"""https://ollama.com/v1 — legit Ollama Cloud. OLLAMA_API_KEY
should be used."""
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"https://ollama.com/v1"
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
resolved = rp.resolve_runtime_provider(requested="custom")
assert resolved["api_key"] == "ol-legit-key"
def test_ollama_key_sent_to_ollama_subdomain(self, monkeypatch):
"""https://api.ollama.com/v1 — legit subdomain."""
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
"https://api.ollama.com/v1"
))
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
resolved = rp.resolve_runtime_provider(requested="custom")
assert resolved["api_key"] == "ol-legit-key"

View file

@ -0,0 +1,148 @@
"""Tests for GHSA-ppp5-vxwm-4cf7 — Host-header validation.
DNS rebinding defence: a victim browser that has the dashboard open
could be tricked into fetching from an attacker-controlled hostname
that TTL-flips to 127.0.0.1. Same-origin / CORS checks won't help —
the browser now treats the attacker origin as same-origin. Validating
the Host header at the application layer rejects the attack.
"""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
_repo = str(Path(__file__).resolve().parents[1])
if _repo not in sys.path:
sys.path.insert(0, _repo)
class TestHostHeaderValidator:
"""Unit test the _is_accepted_host helper directly — cheaper and
more thorough than spinning up the full FastAPI app."""
def test_loopback_bind_accepts_loopback_names(self):
from hermes_cli.web_server import _is_accepted_host
for bound in ("127.0.0.1", "localhost", "::1"):
for host_header in (
"127.0.0.1", "127.0.0.1:9119",
"localhost", "localhost:9119",
"[::1]", "[::1]:9119",
):
assert _is_accepted_host(host_header, bound), (
f"bound={bound} must accept host={host_header}"
)
def test_loopback_bind_rejects_attacker_hostnames(self):
"""The core rebinding defence: attacker-controlled hosts that
TTL-flip to 127.0.0.1 must be rejected."""
from hermes_cli.web_server import _is_accepted_host
for bound in ("127.0.0.1", "localhost"):
for attacker in (
"evil.example",
"evil.example:9119",
"rebind.attacker.test:80",
"localhost.attacker.test", # subdomain trick
"127.0.0.1.evil.test", # lookalike IP prefix
"", # missing Host
):
assert not _is_accepted_host(attacker, bound), (
f"bound={bound} must reject attacker host={attacker!r}"
)
def test_zero_zero_bind_accepts_anything(self):
"""0.0.0.0 means operator explicitly opted into all-interfaces
(requires --insecure). No Host-layer defence is possible rely
on operator network controls."""
from hermes_cli.web_server import _is_accepted_host
for host in ("10.0.0.5", "evil.example", "my-server.corp.net"):
assert _is_accepted_host(host, "0.0.0.0")
assert _is_accepted_host(host + ":9119", "0.0.0.0")
def test_explicit_non_loopback_bind_requires_exact_match(self):
"""If the operator bound to a specific non-loopback hostname,
the Host header must match exactly."""
from hermes_cli.web_server import _is_accepted_host
assert _is_accepted_host("my-server.corp.net", "my-server.corp.net")
assert _is_accepted_host("my-server.corp.net:9119", "my-server.corp.net")
# Different host — reject
assert not _is_accepted_host("evil.example", "my-server.corp.net")
# Loopback — reject (we bound to a specific non-loopback name)
assert not _is_accepted_host("localhost", "my-server.corp.net")
def test_case_insensitive_comparison(self):
"""Host headers are case-insensitive per RFC — accept variations."""
from hermes_cli.web_server import _is_accepted_host
assert _is_accepted_host("LOCALHOST", "127.0.0.1")
assert _is_accepted_host("LocalHost:9119", "127.0.0.1")
class TestHostHeaderMiddleware:
"""End-to-end test via the FastAPI app — verify the middleware
rejects bad Host headers with 400."""
def test_rebinding_request_rejected(self):
from fastapi.testclient import TestClient
from hermes_cli.web_server import app
# Simulate start_server having set the bound_host
app.state.bound_host = "127.0.0.1"
try:
client = TestClient(app)
# The TestClient sends Host: testserver by default — which is
# NOT a loopback alias, so the middleware must reject it.
resp = client.get(
"/api/status",
headers={"Host": "evil.example"},
)
assert resp.status_code == 400
assert "Invalid Host header" in resp.json()["detail"]
finally:
# Clean up so other tests don't inherit the bound_host
if hasattr(app.state, "bound_host"):
del app.state.bound_host
def test_legit_loopback_request_accepted(self):
from fastapi.testclient import TestClient
from hermes_cli.web_server import app
app.state.bound_host = "127.0.0.1"
try:
client = TestClient(app)
# /api/status is in _PUBLIC_API_PATHS — passes auth — so the
# only thing that can reject is the host header middleware
resp = client.get(
"/api/status",
headers={"Host": "localhost:9119"},
)
# Either 200 (endpoint served) or some other non-400 —
# just not the host-rejection 400
assert resp.status_code != 400 or (
"Invalid Host header" not in resp.json().get("detail", "")
)
finally:
if hasattr(app.state, "bound_host"):
del app.state.bound_host
def test_no_bound_host_skips_validation(self):
"""If app.state.bound_host isn't set (e.g. running under test
infra without calling start_server), middleware must pass through
rather than crash."""
from fastapi.testclient import TestClient
from hermes_cli.web_server import app
# Make sure bound_host isn't set
if hasattr(app.state, "bound_host"):
del app.state.bound_host
client = TestClient(app)
resp = client.get("/api/status")
# Should get through to the status endpoint, not a 400
assert resp.status_code != 400

View file

@ -136,13 +136,15 @@ class TestXiaomiModelCatalog:
assert PROVIDER_TO_MODELS_DEV["xiaomi"] == "xiaomi"
def test_static_model_list_fallback(self):
"""Static _PROVIDER_MODELS fallback must exist for model picker."""
"""Static _PROVIDER_MODELS fallback must exist for model picker.
We only assert the provider key is present the specific model
names are data that changes with upstream releases and doesn't
belong in tests.
"""
from hermes_cli.models import _PROVIDER_MODELS
assert "xiaomi" in _PROVIDER_MODELS
models = _PROVIDER_MODELS["xiaomi"]
assert "mimo-v2-pro" in models
assert "mimo-v2-omni" in models
assert "mimo-v2-flash" in models
assert len(_PROVIDER_MODELS["xiaomi"]) >= 1
def test_list_agentic_models_mock(self, monkeypatch):
"""When models.dev returns Xiaomi data, list_agentic_models should return models."""

View file

@ -118,6 +118,86 @@ class TestOpenAIWireFormatOnCustomProvider:
assert agent._anthropic_prompt_cache_policy() == (False, False)
class TestQwenAlibabaFamily:
"""Qwen on OpenCode/OpenCode-Go/Alibaba — needs cache_control even on OpenAI-wire.
Upstream pi-mono #3392 / #3393 documented that these providers serve
zero cache hits without Anthropic-style markers. Regression reported
by community user (Qwen3.6 on opencode-go burning through
subscription with no cache). Envelope layout, not native, because the
wire format is OpenAI chat.completions.
"""
def test_qwen_on_opencode_go_caches_with_envelope_layout(self):
agent = _make_agent(
provider="opencode-go",
base_url="https://opencode.ai/v1",
api_mode="chat_completions",
model="qwen3.6-plus",
)
should, native = agent._anthropic_prompt_cache_policy()
assert should is True, "Qwen on opencode-go must cache"
assert native is False, "opencode-go is OpenAI-wire; envelope layout"
def test_qwen35_plus_on_opencode_go(self):
agent = _make_agent(
provider="opencode-go",
base_url="https://opencode.ai/v1",
api_mode="chat_completions",
model="qwen3.5-plus",
)
assert agent._anthropic_prompt_cache_policy() == (True, False)
def test_qwen_on_opencode_zen_caches(self):
agent = _make_agent(
provider="opencode",
base_url="https://opencode.ai/v1",
api_mode="chat_completions",
model="qwen3-coder-plus",
)
assert agent._anthropic_prompt_cache_policy() == (True, False)
def test_qwen_on_direct_alibaba_caches(self):
agent = _make_agent(
provider="alibaba",
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
api_mode="chat_completions",
model="qwen3-coder",
)
assert agent._anthropic_prompt_cache_policy() == (True, False)
def test_non_qwen_on_opencode_go_does_not_cache(self):
# GLM / Kimi on opencode-go don't need markers (they have automatic
# server-side caching or none at all).
agent = _make_agent(
provider="opencode-go",
base_url="https://opencode.ai/v1",
api_mode="chat_completions",
model="glm-5",
)
assert agent._anthropic_prompt_cache_policy() == (False, False)
def test_kimi_on_opencode_go_does_not_cache(self):
agent = _make_agent(
provider="opencode-go",
base_url="https://opencode.ai/v1",
api_mode="chat_completions",
model="kimi-k2.5",
)
assert agent._anthropic_prompt_cache_policy() == (False, False)
def test_qwen_on_openrouter_not_affected(self):
# Qwen via OpenRouter falls through — OpenRouter has its own
# upstream caching arrangement for Qwen (provider-dependent).
agent = _make_agent(
provider="openrouter",
base_url="https://openrouter.ai/api/v1",
api_mode="chat_completions",
model="qwen/qwen3-coder",
)
assert agent._anthropic_prompt_cache_policy() == (False, False)
class TestExplicitOverrides:
"""Policy accepts keyword overrides for switch_model / fallback activation."""

View file

@ -67,6 +67,14 @@ def test_get_proxy_from_env_ignores_blank_values(monkeypatch):
assert _get_proxy_from_env() == "http://real-proxy:8080"
def test_get_proxy_from_env_normalizes_socks_alias(monkeypatch):
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
"https_proxy", "http_proxy", "all_proxy"):
monkeypatch.delenv(key, raising=False)
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
assert _get_proxy_from_env() == "socks5://127.0.0.1:1080/"
@patch("run_agent.OpenAI")
def test_create_openai_client_routes_via_proxy_when_env_set(mock_openai, monkeypatch):
"""With HTTPS_PROXY set, the custom httpx.Client must mount an HTTPProxy pool.

View file

@ -33,6 +33,11 @@ class TestInterruptPropagationToChild(unittest.TestCase):
agent._active_children = []
agent._active_children_lock = threading.Lock()
agent.quiet_mode = True
# Provider/model/base_url are read by stale-timeout resolution paths;
# the specific values don't matter for interrupt tests.
agent.provider = "openrouter"
agent.model = "test/model"
agent._base_url = "http://localhost:1234"
return agent
def test_parent_interrupt_sets_child_flag(self):

View file

@ -952,6 +952,84 @@ class TestBuildApiKwargs:
assert "temperature" not in kwargs
def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
"""Kimi endpoint should send max_tokens=32000 and reasoning_effort as
top-level params, matching Kimi CLI's default behavior."""
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["max_tokens"] == 32000
assert kwargs["reasoning_effort"] == "medium"
def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
"""reasoning_effort should reflect reasoning_config.effort when set."""
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
agent.reasoning_config = {"enabled": True, "effort": "high"}
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["reasoning_effort"] == "high"
def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
"""Kimi endpoint should send extra_body.thinking={"type":"enabled"}
to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
def test_kimi_coding_endpoint_disables_thinking(self, agent):
"""When reasoning_config.enabled=False, thinking should be disabled
and reasoning_effort should be omitted entirely mirroring Kimi
CLI's with_thinking("off") which maps to reasoning_effort=None."""
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
agent.reasoning_config = {"enabled": False}
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["extra_body"]["thinking"] == {"type": "disabled"}
assert "reasoning_effort" not in kwargs
def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
"""api.moonshot.ai should get the same Kimi-compatible params."""
agent.base_url = "https://api.moonshot.ai/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-k2.5"
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["max_tokens"] == 32000
assert kwargs["reasoning_effort"] == "medium"
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
"""api.moonshot.cn (China endpoint) should get the same params."""
agent.base_url = "https://api.moonshot.cn/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-k2.5"
messages = [{"role": "user", "content": "hi"}]
kwargs = agent._build_api_kwargs(messages)
assert kwargs["max_tokens"] == 32000
assert kwargs["reasoning_effort"] == "medium"
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
def test_provider_preferences_injected(self, agent):
agent.base_url = "https://openrouter.ai/api/v1"
agent.providers_allowed = ["Anthropic"]

203
tests/test_account_usage.py Normal file
View file

@ -0,0 +1,203 @@
from datetime import datetime, timezone
from agent.account_usage import (
AccountUsageSnapshot,
AccountUsageWindow,
fetch_account_usage,
render_account_usage_lines,
)
class _Response:
def __init__(self, payload, status_code=200):
self._payload = payload
self.status_code = status_code
def raise_for_status(self):
if self.status_code >= 400:
raise RuntimeError(f"HTTP {self.status_code}")
def json(self):
return self._payload
class _Client:
def __init__(self, payload):
self._payload = payload
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def get(self, url, headers=None):
return _Response(self._payload)
class _RoutingClient:
def __init__(self, payloads):
self._payloads = payloads
def __enter__(self):
return self
def __exit__(self, exc_type, exc, tb):
return False
def get(self, url, headers=None):
return _Response(self._payloads[url])
def test_fetch_account_usage_codex(monkeypatch):
monkeypatch.setattr(
"agent.account_usage.resolve_codex_runtime_credentials",
lambda refresh_if_expiring=True: {
"provider": "openai-codex",
"base_url": "https://chatgpt.com/backend-api/codex",
"api_key": "access-token",
},
)
monkeypatch.setattr(
"agent.account_usage._read_codex_tokens",
lambda: {"tokens": {"account_id": "acct_123"}},
)
monkeypatch.setattr(
"agent.account_usage.httpx.Client",
lambda timeout=15.0: _Client(
{
"plan_type": "pro",
"rate_limit": {
"primary_window": {
"used_percent": 15,
"reset_at": 1_900_000_000,
"limit_window_seconds": 18000,
},
"secondary_window": {
"used_percent": 40,
"reset_at": 1_900_500_000,
"limit_window_seconds": 604800,
},
},
"credits": {"has_credits": True, "balance": 12.5},
}
),
)
snapshot = fetch_account_usage("openai-codex")
assert snapshot is not None
assert snapshot.plan == "Pro"
assert len(snapshot.windows) == 2
assert snapshot.windows[0].label == "Session"
assert snapshot.windows[0].used_percent == 15.0
assert snapshot.windows[0].reset_at == datetime.fromtimestamp(1_900_000_000, tz=timezone.utc)
assert "Credits balance: $12.50" in snapshot.details
def test_render_account_usage_lines_includes_reset_and_provider():
snapshot = AccountUsageSnapshot(
provider="openai-codex",
source="usage_api",
fetched_at=datetime.now(timezone.utc),
plan="Pro",
windows=(
AccountUsageWindow(
label="Session",
used_percent=25,
reset_at=datetime.now(timezone.utc),
),
),
details=("Credits balance: $9.99",),
)
lines = render_account_usage_lines(snapshot)
assert lines[0] == "📈 Account limits"
assert "openai-codex (Pro)" in lines[1]
assert "Session: 75% remaining (25% used)" in lines[2]
assert "Credits balance: $9.99" in lines[3]
def test_fetch_account_usage_openrouter_uses_limit_remaining_and_ignores_deprecated_rate_limit(monkeypatch):
monkeypatch.setattr(
"agent.account_usage.resolve_runtime_provider",
lambda requested, explicit_base_url=None, explicit_api_key=None: {
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "sk-test",
},
)
monkeypatch.setattr(
"agent.account_usage.httpx.Client",
lambda timeout=10.0: _RoutingClient(
{
"https://openrouter.ai/api/v1/credits": {
"data": {"total_credits": 300.0, "total_usage": 10.92}
},
"https://openrouter.ai/api/v1/key": {
"data": {
"limit": 100.0,
"limit_remaining": 70.0,
"limit_reset": "monthly",
"usage": 12.5,
"usage_daily": 0.5,
"usage_weekly": 2.0,
"usage_monthly": 8.0,
"rate_limit": {"requests": -1, "interval": "10s"},
}
},
}
),
)
snapshot = fetch_account_usage("openrouter")
assert snapshot is not None
assert snapshot.windows == (
AccountUsageWindow(
label="API key quota",
used_percent=30.0,
detail="$70.00 of $100.00 remaining • resets monthly",
),
)
assert "Credits balance: $289.08" in snapshot.details
assert "API key usage: $12.50 total • $0.50 today • $2.00 this week • $8.00 this month" in snapshot.details
assert all("-1 requests / 10s" not in line for line in render_account_usage_lines(snapshot))
def test_fetch_account_usage_openrouter_omits_quota_window_when_key_has_no_limit(monkeypatch):
monkeypatch.setattr(
"agent.account_usage.resolve_runtime_provider",
lambda requested, explicit_base_url=None, explicit_api_key=None: {
"provider": "openrouter",
"base_url": "https://openrouter.ai/api/v1",
"api_key": "sk-test",
},
)
monkeypatch.setattr(
"agent.account_usage.httpx.Client",
lambda timeout=10.0: _RoutingClient(
{
"https://openrouter.ai/api/v1/credits": {
"data": {"total_credits": 100.0, "total_usage": 25.5}
},
"https://openrouter.ai/api/v1/key": {
"data": {
"limit": None,
"limit_remaining": None,
"usage": 25.5,
"usage_daily": 1.25,
"usage_weekly": 4.5,
"usage_monthly": 18.0,
}
},
}
),
)
snapshot = fetch_account_usage("openrouter")
assert snapshot is not None
assert snapshot.windows == ()
assert "Credits balance: $74.50" in snapshot.details
assert "API key usage: $25.50 total • $1.25 today • $4.50 this week • $18.00 this month" in snapshot.details

View file

@ -106,3 +106,55 @@ class TestBaseUrlHostMatchesEdgeCases:
def test_trailing_dot_on_domain_stripped(self):
assert base_url_host_matches("https://openrouter.ai/v1", "openrouter.ai.") is True
class TestOllamaUrlHostCheck:
"""GHSA-76xc-57q6-vm5m — ollama.com was using a raw substring match for
credential selection (same bug class as GHSA-xf8p-v2cg-h7h5 for OpenRouter).
These tests lock in that the base_url_host_matches fix correctly rejects
the same attack vectors for Ollama.
"""
def test_ollama_com_path_injection_rejected(self):
"""http://evil.test/ollama.com/v1 — ollama.com appears in the path,
not the host. Must not be treated as Ollama Cloud."""
assert base_url_host_matches(
"http://127.0.0.1:9000/ollama.com/v1", "ollama.com"
) is False
def test_ollama_com_subdomain_lookalike_rejected(self):
"""ollama.com.attacker.test is a separate host, not ollama.com."""
assert base_url_host_matches(
"http://ollama.com.attacker.test:9000/v1", "ollama.com"
) is False
def test_ollama_com_localtest_me_rejected(self):
"""ollama.com.localtest.me resolves to 127.0.0.1 via localtest.me
but its true hostname is localtest.me, not ollama.com."""
assert base_url_host_matches(
"http://ollama.com.localtest.me:9000/v1", "ollama.com"
) is False
def test_ollama_ai_is_not_ollama_com(self):
"""Different TLD. ollama.ai is not ollama.com."""
assert base_url_host_matches(
"https://ollama.ai/v1", "ollama.com"
) is False
def test_localhost_ollama_port_is_not_ollama_com(self):
"""http://localhost:11434/v1 is a local Ollama install, but its
hostname is localhost, so OLLAMA_API_KEY (an ollama.com-only secret)
must not be sent."""
assert base_url_host_matches(
"http://localhost:11434/v1", "ollama.com"
) is False
def test_genuine_ollama_com_matches(self):
assert base_url_host_matches(
"https://ollama.com/api/generate", "ollama.com"
) is True
def test_ollama_com_subdomain_matches(self):
assert base_url_host_matches(
"https://api.ollama.com/v1", "ollama.com"
) is True

View file

@ -161,6 +161,8 @@ def test_transform_tool_result_runs_after_post_tool_call(monkeypatch):
def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_path):
"""End-to-end: load a real plugin from HERMES_HOME and verify it rewrites results."""
import yaml
hermes_home = Path(os.environ["HERMES_HOME"])
plugins_dir = hermes_home / "plugins"
plugin_dir = plugins_dir / "transform_result_canon"
@ -172,7 +174,15 @@ def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_pat
'lambda **kw: f\'CANON[{kw["tool_name"]}]\' + kw["result"])\n',
encoding="utf-8",
)
# Plugins are opt-in — must be listed in plugins.enabled to load.
cfg_path = hermes_home / "config.yaml"
cfg_path.write_text(
yaml.safe_dump({"plugins": {"enabled": ["transform_result_canon"]}}),
encoding="utf-8",
)
# Force a fresh plugin manager so the new config is picked up.
plugins_mod._plugin_manager = plugins_mod.PluginManager()
plugins_mod.discover_plugins()
out = _run_handle_function_call(

View file

@ -58,10 +58,3 @@ class TestCamofoxConfigDefaults:
browser_cfg = DEFAULT_CONFIG["browser"]
assert browser_cfg["camofox"]["managed_persistence"] is False
def test_config_version_matches_current_schema(self):
from hermes_cli.config import DEFAULT_CONFIG
# The current schema version is tracked globally; unrelated default
# options may bump it after browser defaults are added.
assert DEFAULT_CONFIG["_config_version"] == 20

View file

@ -172,28 +172,60 @@ class TestTerminalIntegration:
assert blocked_var not in result
assert "PATH" in result
def test_passthrough_allows_blocklisted_var(self):
from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST
def test_passthrough_cannot_override_provider_blocklist(self):
"""GHSA-rhgp-j443-p4rf: register_env_passthrough must NOT accept
Hermes provider credentials that was the bypass where a skill
could declare ANTHROPIC_TOKEN / OPENAI_API_KEY as passthrough and
defeat the execute_code sandbox scrubbing."""
from tools.environments.local import (
_sanitize_subprocess_env,
_HERMES_PROVIDER_ENV_BLOCKLIST,
)
blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
# Attempt to register — must be silently refused (logged warning).
register_env_passthrough([blocked_var])
# is_env_passthrough must NOT report it as allowed
assert not is_env_passthrough(blocked_var)
# Sanitizer still strips the var from subprocess env
env = {blocked_var: "secret_value", "PATH": "/usr/bin"}
result = _sanitize_subprocess_env(env)
assert blocked_var in result
assert result[blocked_var] == "secret_value"
assert blocked_var not in result
assert "PATH" in result
def test_make_run_env_passthrough(self, monkeypatch):
from tools.environments.local import _make_run_env, _HERMES_PROVIDER_ENV_BLOCKLIST
def test_make_run_env_blocklist_override_rejected(self):
"""_make_run_env must NOT expose a blocklisted var to subprocess env
even after a skill attempts to register it via passthrough."""
import os
from tools.environments.local import (
_make_run_env,
_HERMES_PROVIDER_ENV_BLOCKLIST,
)
blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
monkeypatch.setenv(blocked_var, "secret_value")
os.environ[blocked_var] = "secret_value"
try:
# Without passthrough — blocked
result_before = _make_run_env({})
assert blocked_var not in result_before
# Without passthrough — blocked
result_before = _make_run_env({})
assert blocked_var not in result_before
# Skill tries to register it — must be refused, so still blocked
register_env_passthrough([blocked_var])
result_after = _make_run_env({})
assert blocked_var not in result_after
finally:
os.environ.pop(blocked_var, None)
# With passthrough — allowed
register_env_passthrough([blocked_var])
result_after = _make_run_env({})
assert blocked_var in result_after
def test_non_hermes_api_key_still_registerable(self):
"""Third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
Hermes provider credentials and must still pass through skills
that legitimately wrap third-party APIs must keep working."""
# TENOR_API_KEY is a real example — used by the gif-search skill
register_env_passthrough(["TENOR_API_KEY"])
assert is_env_passthrough("TENOR_API_KEY")
# Arbitrary skill-specific var
register_env_passthrough(["MY_SKILL_CUSTOM_CONFIG"])
assert is_env_passthrough("MY_SKILL_CUSTOM_CONFIG")

View file

@ -230,3 +230,102 @@ class TestEscapeDriftGuard:
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
assert err is None
assert count == 1
class TestFindClosestLines:
def setup_method(self):
from tools.fuzzy_match import find_closest_lines
self.find_closest_lines = find_closest_lines
def test_finds_similar_line(self):
content = "def foo():\n pass\ndef bar():\n return 1\n"
result = self.find_closest_lines("def baz():", content)
assert "def foo" in result or "def bar" in result
def test_returns_empty_for_no_match(self):
content = "completely different content here"
result = self.find_closest_lines("xyzzy_no_match_possible_!!!", content)
assert result == ""
def test_returns_empty_for_empty_inputs(self):
assert self.find_closest_lines("", "some content") == ""
assert self.find_closest_lines("old string", "") == ""
def test_includes_context_lines(self):
content = "line1\nline2\ndef target():\n pass\nline5\n"
result = self.find_closest_lines("def target():", content)
assert "target" in result
def test_includes_line_numbers(self):
content = "line1\nline2\ndef foo():\n pass\n"
result = self.find_closest_lines("def foo():", content)
# Should include line numbers in format "N| content"
assert "|" in result
class TestFormatNoMatchHint:
"""Gating tests for format_no_match_hint — the shared helper that decides
whether a 'Did you mean?' snippet should be appended to an error.
"""
def setup_method(self):
from tools.fuzzy_match import format_no_match_hint
self.fmt = format_no_match_hint
def test_fires_on_could_not_find_with_match(self):
"""Classic no-match: similar content exists → hint fires."""
content = "def foo():\n pass\ndef bar():\n pass\n"
result = self.fmt(
"Could not find a match for old_string in the file",
0, "def baz():", content,
)
assert "Did you mean" in result
assert "foo" in result or "bar" in result
def test_silent_on_ambiguous_match_error(self):
"""'Found N matches' is not a missing-match failure — no hint."""
content = "aaa bbb aaa\n"
result = self.fmt(
"Found 2 matches for old_string. Provide more context to make it unique, or use replace_all=True.",
0, "aaa", content,
)
assert result == ""
def test_silent_on_escape_drift_error(self):
"""Escape-drift errors are intentional blocks — hint would mislead."""
content = "x = 1\n"
result = self.fmt(
"Escape-drift detected: old_string and new_string contain the literal sequence '\\\\''...",
0, "x = \\'1\\'", content,
)
assert result == ""
def test_silent_on_identical_strings(self):
"""old_string == new_string — hint irrelevant."""
result = self.fmt(
"old_string and new_string are identical",
0, "foo", "foo bar\n",
)
assert result == ""
def test_silent_when_match_count_nonzero(self):
"""If match succeeded, we shouldn't be in the error path — defense in depth."""
result = self.fmt(
"Could not find a match for old_string in the file",
1, "foo", "foo bar\n",
)
assert result == ""
def test_silent_on_none_error(self):
"""No error at all — no hint."""
result = self.fmt(None, 0, "foo", "bar\n")
assert result == ""
def test_silent_when_no_similar_content(self):
"""Even for a valid no-match error, skip hint when nothing similar exists."""
result = self.fmt(
"Could not find a match for old_string in the file",
0, "totally_unique_xyzzy_qux", "abc\nxyz\n",
)
assert result == ""

View file

@ -0,0 +1,39 @@
"""FAL_KEY env var normalization (whitespace-only treated as unset)."""
def test_fal_key_whitespace_is_unset(monkeypatch):
# Whitespace-only FAL_KEY must NOT register as configured, and the managed
# gateway fallback must be disabled for this assertion to be meaningful.
monkeypatch.setenv("FAL_KEY", " ")
from tools import image_generation_tool
monkeypatch.setattr(
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
)
assert image_generation_tool.check_fal_api_key() is False
def test_fal_key_valid(monkeypatch):
monkeypatch.setenv("FAL_KEY", "sk-test")
from tools import image_generation_tool
monkeypatch.setattr(
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
)
assert image_generation_tool.check_fal_api_key() is True
def test_fal_key_empty_is_unset(monkeypatch):
monkeypatch.setenv("FAL_KEY", "")
from tools import image_generation_tool
monkeypatch.setattr(
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
)
assert image_generation_tool.check_fal_api_key() is False

View file

@ -0,0 +1,162 @@
"""Tests for terminal.shell_init_files / terminal.auto_source_bashrc.
A bash ``-l -c`` invocation does NOT source ``~/.bashrc``, so tools that
register themselves there (nvm, asdf, pyenv) stay invisible to the
environment snapshot built by ``LocalEnvironment.init_session``. These
tests verify the config-driven prelude that fixes that.
"""
import os
from unittest.mock import patch
import pytest
from tools.environments.local import (
LocalEnvironment,
_prepend_shell_init,
_read_terminal_shell_init_config,
_resolve_shell_init_files,
)
class TestResolveShellInitFiles:
def test_auto_sources_bashrc_when_present(self, tmp_path, monkeypatch):
bashrc = tmp_path / ".bashrc"
bashrc.write_text('export MARKER=seen\n')
monkeypatch.setenv("HOME", str(tmp_path))
# Default config: auto_source_bashrc on, no explicit list.
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([], True),
):
resolved = _resolve_shell_init_files()
assert resolved == [str(bashrc)]
def test_skips_bashrc_when_missing(self, tmp_path, monkeypatch):
# No bashrc written.
monkeypatch.setenv("HOME", str(tmp_path))
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([], True),
):
resolved = _resolve_shell_init_files()
assert resolved == []
def test_auto_source_bashrc_off_suppresses_default(self, tmp_path, monkeypatch):
bashrc = tmp_path / ".bashrc"
bashrc.write_text('export MARKER=seen\n')
monkeypatch.setenv("HOME", str(tmp_path))
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([], False),
):
resolved = _resolve_shell_init_files()
assert resolved == []
def test_explicit_list_wins_over_auto(self, tmp_path, monkeypatch):
bashrc = tmp_path / ".bashrc"
bashrc.write_text('export FROM_BASHRC=1\n')
custom = tmp_path / "custom.sh"
custom.write_text('export FROM_CUSTOM=1\n')
monkeypatch.setenv("HOME", str(tmp_path))
# auto_source_bashrc stays True but the explicit list takes precedence.
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([str(custom)], True),
):
resolved = _resolve_shell_init_files()
assert resolved == [str(custom)]
assert str(bashrc) not in resolved
def test_expands_home_and_env_vars(self, tmp_path, monkeypatch):
target = tmp_path / "rc" / "custom.sh"
target.parent.mkdir()
target.write_text('export A=1\n')
monkeypatch.setenv("HOME", str(tmp_path))
monkeypatch.setenv("CUSTOM_RC_DIR", str(tmp_path / "rc"))
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=(["~/rc/custom.sh"], False),
):
resolved_home = _resolve_shell_init_files()
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=(["${CUSTOM_RC_DIR}/custom.sh"], False),
):
resolved_var = _resolve_shell_init_files()
assert resolved_home == [str(target)]
assert resolved_var == [str(target)]
def test_missing_explicit_files_are_skipped_silently(self, tmp_path, monkeypatch):
monkeypatch.setenv("HOME", str(tmp_path))
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([str(tmp_path / "does-not-exist.sh")], False),
):
resolved = _resolve_shell_init_files()
assert resolved == []
class TestPrependShellInit:
def test_empty_list_returns_command_unchanged(self):
assert _prepend_shell_init("echo hi", []) == "echo hi"
def test_prepends_guarded_source_lines(self):
wrapped = _prepend_shell_init("echo hi", ["/tmp/a.sh", "/tmp/b.sh"])
assert "echo hi" in wrapped
# Each file is sourced through a guarded [ -r … ] && . '…' || true
# pattern so a missing/broken rc can't abort the bootstrap.
assert "/tmp/a.sh" in wrapped
assert "/tmp/b.sh" in wrapped
assert "|| true" in wrapped
assert "set +e" in wrapped
def test_escapes_single_quotes(self):
wrapped = _prepend_shell_init("echo hi", ["/tmp/o'malley.sh"])
# The path must survive as the shell receives it; embedded single
# quote is escaped as '\'' rather than breaking the outer quoting.
assert "o'\\''malley" in wrapped
@pytest.mark.skipif(
os.environ.get("CI") == "true" and not os.path.isfile("/bin/bash"),
reason="Requires bash; CI sandbox may strip it.",
)
class TestSnapshotEndToEnd:
"""Spin up a real LocalEnvironment and confirm the snapshot sources
extra init files."""
def test_snapshot_picks_up_init_file_exports(self, tmp_path, monkeypatch):
init_file = tmp_path / "custom-init.sh"
init_file.write_text(
'export HERMES_SHELL_INIT_PROBE="probe-ok"\n'
'export PATH="/opt/shell-init-probe/bin:$PATH"\n'
)
with patch(
"tools.environments.local._read_terminal_shell_init_config",
return_value=([str(init_file)], False),
):
env = LocalEnvironment(cwd=str(tmp_path), timeout=15)
try:
result = env.execute(
'echo "PROBE=$HERMES_SHELL_INIT_PROBE"; echo "PATH=$PATH"'
)
finally:
env.cleanup()
output = result.get("output", "")
assert "PROBE=probe-ok" in output
assert "/opt/shell-init-probe/bin" in output

View file

@ -0,0 +1,252 @@
"""Tests for MCP tool-handler circuit-breaker recovery.
The circuit breaker in ``tools/mcp_tool.py`` is intended to short-circuit
calls to an MCP server that has failed ``_CIRCUIT_BREAKER_THRESHOLD``
consecutive times, then *transition back to a usable state* once the
server has had time to recover (or an explicit reconnect succeeds).
The original implementation only had two states closed and open with
no mechanism to transition back to closed, so a tripped breaker stayed
tripped for the lifetime of the process. These tests lock in the
half-open / cooldown / reconnect-resets-breaker behavior that fixes
that.
"""
import json
from unittest.mock import MagicMock
import pytest
pytest.importorskip("mcp.client.auth.oauth2")
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
def _install_stub_server(mcp_tool_module, name: str, call_tool_impl):
"""Install a fake MCP server in the module's registry.
``call_tool_impl`` is an async function stored at ``session.call_tool``
(it's what the tool handler invokes).
"""
server = MagicMock()
server.name = name
session = MagicMock()
session.call_tool = call_tool_impl
server.session = session
server._reconnect_event = MagicMock()
server._ready = MagicMock()
server._ready.is_set.return_value = True
mcp_tool_module._servers[name] = server
mcp_tool_module._server_error_counts.pop(name, None)
if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
mcp_tool_module._server_breaker_opened_at.pop(name, None)
return server
def _cleanup(mcp_tool_module, name: str) -> None:
mcp_tool_module._servers.pop(name, None)
mcp_tool_module._server_error_counts.pop(name, None)
if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
mcp_tool_module._server_breaker_opened_at.pop(name, None)
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
def test_circuit_breaker_half_opens_after_cooldown(monkeypatch, tmp_path):
"""After a tripped breaker's cooldown elapses, the *next* call must
actually execute against the session (half-open probe). When the
probe succeeds, the breaker resets to fully closed.
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from tools import mcp_tool
from tools.mcp_tool import _make_tool_handler
call_count = {"n": 0}
async def _call_tool_success(*a, **kw):
call_count["n"] += 1
result = MagicMock()
result.isError = False
block = MagicMock()
block.text = "ok"
result.content = [block]
result.structuredContent = None
return result
_install_stub_server(mcp_tool, "srv", _call_tool_success)
mcp_tool._ensure_mcp_loop()
try:
# Trip the breaker by setting the count at/above threshold and
# stamping the open-time to "now".
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
fake_now = [1000.0]
def _fake_monotonic():
return fake_now[0]
monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
# The breaker-open timestamp dict is introduced by the fix; on
# a pre-fix build it won't exist, which will cause the test to
# fail at the .get() inside the gate (correct — the fix is
# required for this state to be tracked at all).
if hasattr(mcp_tool, "_server_breaker_opened_at"):
mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
handler = _make_tool_handler("srv", "tool1", 10.0)
# Before cooldown: must short-circuit (no session call).
result = handler({})
parsed = json.loads(result)
assert "error" in parsed, parsed
assert "unreachable" in parsed["error"].lower()
assert call_count["n"] == 0, (
"breaker should short-circuit before cooldown elapses"
)
# Advance past cooldown → next call is a half-open probe that
# actually hits the session.
fake_now[0] += cooldown + 1.0
result = handler({})
parsed = json.loads(result)
assert parsed.get("result") == "ok", parsed
assert call_count["n"] == 1, "half-open probe should invoke session"
# On probe success the breaker must close (count reset to 0).
assert mcp_tool._server_error_counts.get("srv", 0) == 0
finally:
_cleanup(mcp_tool, "srv")
def test_circuit_breaker_reopens_on_probe_failure(monkeypatch, tmp_path):
"""If the half-open probe fails, the breaker must re-arm the
cooldown (not let every subsequent call through).
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from tools import mcp_tool
from tools.mcp_tool import _make_tool_handler
call_count = {"n": 0}
async def _call_tool_fails(*a, **kw):
call_count["n"] += 1
raise RuntimeError("still broken")
_install_stub_server(mcp_tool, "srv", _call_tool_fails)
mcp_tool._ensure_mcp_loop()
try:
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
fake_now = [1000.0]
def _fake_monotonic():
return fake_now[0]
monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
if hasattr(mcp_tool, "_server_breaker_opened_at"):
mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
handler = _make_tool_handler("srv", "tool1", 10.0)
# Advance past cooldown, run probe, expect failure.
fake_now[0] += cooldown + 1.0
result = handler({})
parsed = json.loads(result)
assert "error" in parsed
assert call_count["n"] == 1, "probe should invoke session once"
# The probe failure must have re-armed the cooldown — another
# immediate call should short-circuit, not invoke session again.
result = handler({})
parsed = json.loads(result)
assert "unreachable" in parsed.get("error", "").lower()
assert call_count["n"] == 1, (
"breaker should re-open and block further calls after probe failure"
)
finally:
_cleanup(mcp_tool, "srv")
def test_circuit_breaker_cleared_on_reconnect(monkeypatch, tmp_path):
"""When the auth-recovery path successfully reconnects the server,
the breaker should be cleared so subsequent calls aren't gated on a
stale failure count even if the post-reconnect retry itself fails.
This locks in the fix-#2 contract: a successful reconnect is
sufficient evidence that the server is viable again. Under the old
implementation, reset only happened on retry *success*, so a
reconnect+retry-failure left the counter pinned above threshold
forever.
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from tools import mcp_tool
from tools.mcp_oauth_manager import get_manager, reset_manager_for_tests
from mcp.client.auth import OAuthFlowError
reset_manager_for_tests()
async def _call_tool_unused(*a, **kw): # pragma: no cover
raise AssertionError("session.call_tool should not be reached in this test")
_install_stub_server(mcp_tool, "srv", _call_tool_unused)
mcp_tool._ensure_mcp_loop()
# Open the breaker well above threshold, with a recent open-time so
# it would short-circuit everything without a reset.
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + 2
if hasattr(mcp_tool, "_server_breaker_opened_at"):
import time as _time
mcp_tool._server_breaker_opened_at["srv"] = _time.monotonic()
# Force handle_401 to claim recovery succeeded.
mgr = get_manager()
async def _h401(name, token=None):
return True
monkeypatch.setattr(mgr, "handle_401", _h401)
try:
# Retry fails *after* the successful reconnect. Under the old
# implementation this bumps an already-tripped counter even
# higher. Under fix #2 the reset happens on successful
# reconnect, and the post-retry bump only raises the fresh
# count to 1 — still below threshold.
def _retry_call():
raise OAuthFlowError("still failing post-reconnect")
result = mcp_tool._handle_auth_error_and_retry(
"srv",
OAuthFlowError("initial"),
_retry_call,
"tools/call test",
)
# The call as a whole still surfaces needs_reauth because the
# retry itself didn't succeed, but the breaker state must
# reflect the successful reconnect.
assert result is not None
parsed = json.loads(result)
assert parsed.get("needs_reauth") is True, parsed
# Post-reconnect count was reset to 0, then the failing retry
# bumped it to exactly 1 — well below threshold.
count = mcp_tool._server_error_counts.get("srv", 0)
assert count < mcp_tool._CIRCUIT_BREAKER_THRESHOLD, (
f"successful reconnect must reset the breaker below threshold; "
f"got count={count}, threshold={mcp_tool._CIRCUIT_BREAKER_THRESHOLD}"
)
finally:
_cleanup(mcp_tool, "srv")

View file

@ -173,6 +173,8 @@ def test_terminal_output_transform_does_not_change_approval_or_exit_code_meaning
def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp_path):
import yaml
hermes_home = Path(os.environ["HERMES_HOME"])
plugins_dir = hermes_home / "plugins"
plugin_dir = plugins_dir / "terminal_transform"
@ -184,7 +186,15 @@ def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp
'lambda **kw: "PLUGIN-HEAD\\n" + kw["output"] + "\\nPLUGIN-TAIL")\n',
encoding="utf-8",
)
# Plugins are opt-in — must be listed in plugins.enabled to load.
cfg_path = hermes_home / "config.yaml"
cfg_path.write_text(
yaml.safe_dump({"plugins": {"enabled": ["terminal_transform"]}}),
encoding="utf-8",
)
# Force a fresh plugin manager so the new config is picked up.
plugins_mod._plugin_manager = plugins_mod.PluginManager()
plugins_mod.discover_plugins()
long_output = "X" * 60000

View file

@ -0,0 +1,198 @@
"""Tests for the KittenTTS local provider in tools/tts_tool.py."""
import json
from unittest.mock import MagicMock, patch
import numpy as np
import pytest
@pytest.fixture(autouse=True)
def clean_env(monkeypatch):
for key in ("HERMES_SESSION_PLATFORM",):
monkeypatch.delenv(key, raising=False)
@pytest.fixture(autouse=True)
def clear_kittentts_cache():
"""Reset the module-level model cache between tests."""
from tools import tts_tool as _tt
_tt._kittentts_model_cache.clear()
yield
_tt._kittentts_model_cache.clear()
@pytest.fixture
def mock_kittentts_module():
"""Inject a fake kittentts + soundfile module that return stub objects."""
fake_model = MagicMock()
# 24kHz float32 PCM at ~2s of silence
fake_model.generate.return_value = np.zeros(48000, dtype=np.float32)
fake_cls = MagicMock(return_value=fake_model)
fake_kittentts = MagicMock()
fake_kittentts.KittenTTS = fake_cls
# Stub soundfile — the real package isn't installed in CI venv, and
# _generate_kittentts does `import soundfile as sf` at runtime.
fake_sf = MagicMock()
def _fake_write(path, audio, samplerate):
# Emulate writing a real file so downstream path checks succeed.
import pathlib
pathlib.Path(path).write_bytes(b"RIFF\x00\x00\x00\x00WAVEfmt fake")
fake_sf.write = _fake_write
with patch.dict(
"sys.modules",
{"kittentts": fake_kittentts, "soundfile": fake_sf},
):
yield fake_model, fake_cls
class TestGenerateKittenTts:
def test_successful_wav_generation(self, tmp_path, mock_kittentts_module):
from tools.tts_tool import _generate_kittentts
fake_model, fake_cls = mock_kittentts_module
output_path = str(tmp_path / "test.wav")
result = _generate_kittentts("Hello world", output_path, {})
assert result == output_path
assert (tmp_path / "test.wav").exists()
fake_cls.assert_called_once()
fake_model.generate.assert_called_once()
def test_config_passes_voice_speed_cleantext(self, tmp_path, mock_kittentts_module):
from tools.tts_tool import _generate_kittentts
fake_model, _ = mock_kittentts_module
config = {
"kittentts": {
"model": "KittenML/kitten-tts-mini-0.8",
"voice": "Luna",
"speed": 1.25,
"clean_text": False,
}
}
_generate_kittentts("Hi there", str(tmp_path / "out.wav"), config)
call_kwargs = fake_model.generate.call_args.kwargs
assert call_kwargs["voice"] == "Luna"
assert call_kwargs["speed"] == 1.25
assert call_kwargs["clean_text"] is False
def test_default_model_and_voice(self, tmp_path, mock_kittentts_module):
from tools.tts_tool import (
DEFAULT_KITTENTTS_MODEL,
DEFAULT_KITTENTTS_VOICE,
_generate_kittentts,
)
fake_model, fake_cls = mock_kittentts_module
_generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
fake_cls.assert_called_once_with(DEFAULT_KITTENTTS_MODEL)
assert fake_model.generate.call_args.kwargs["voice"] == DEFAULT_KITTENTTS_VOICE
def test_model_is_cached_across_calls(self, tmp_path, mock_kittentts_module):
from tools.tts_tool import _generate_kittentts
_, fake_cls = mock_kittentts_module
_generate_kittentts("One", str(tmp_path / "a.wav"), {})
_generate_kittentts("Two", str(tmp_path / "b.wav"), {})
# Same model name → class instantiated exactly once
assert fake_cls.call_count == 1
def test_different_models_are_cached_separately(self, tmp_path, mock_kittentts_module):
from tools.tts_tool import _generate_kittentts
_, fake_cls = mock_kittentts_module
_generate_kittentts(
"A", str(tmp_path / "a.wav"),
{"kittentts": {"model": "KittenML/kitten-tts-nano-0.8-int8"}},
)
_generate_kittentts(
"B", str(tmp_path / "b.wav"),
{"kittentts": {"model": "KittenML/kitten-tts-mini-0.8"}},
)
assert fake_cls.call_count == 2
def test_non_wav_extension_triggers_ffmpeg_conversion(
self, tmp_path, mock_kittentts_module, monkeypatch
):
"""Non-.wav output path causes WAV → target ffmpeg conversion."""
from tools import tts_tool as _tt
calls = []
def fake_shutil_which(cmd):
return "/usr/bin/ffmpeg" if cmd == "ffmpeg" else None
def fake_run(cmd, check=False, timeout=None, **kw):
calls.append(cmd)
# Emulate ffmpeg writing the output file
import pathlib
out_path = cmd[-1]
pathlib.Path(out_path).write_bytes(b"fake-mp3-data")
return MagicMock(returncode=0)
monkeypatch.setattr(_tt.shutil, "which", fake_shutil_which)
monkeypatch.setattr(_tt.subprocess, "run", fake_run)
output_path = str(tmp_path / "test.mp3")
result = _tt._generate_kittentts("Hi", output_path, {})
assert result == output_path
assert len(calls) == 1
assert calls[0][0] == "/usr/bin/ffmpeg"
def test_missing_kittentts_raises_import_error(self, tmp_path, monkeypatch):
"""When kittentts package is not installed, _import_kittentts raises."""
import sys
monkeypatch.setitem(sys.modules, "kittentts", None)
from tools.tts_tool import _generate_kittentts
with pytest.raises((ImportError, TypeError)):
_generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
class TestCheckKittenttsAvailable:
def test_reports_available_when_package_present(self, monkeypatch):
import importlib.util
from tools.tts_tool import _check_kittentts_available
fake_spec = MagicMock()
monkeypatch.setattr(
importlib.util, "find_spec",
lambda name: fake_spec if name == "kittentts" else None,
)
assert _check_kittentts_available() is True
def test_reports_unavailable_when_package_missing(self, monkeypatch):
import importlib.util
from tools.tts_tool import _check_kittentts_available
monkeypatch.setattr(importlib.util, "find_spec", lambda name: None)
assert _check_kittentts_available() is False
class TestDispatcherBranch:
def test_kittentts_not_installed_returns_helpful_error(self, monkeypatch, tmp_path):
"""When provider=kittentts but package missing, return JSON error with setup hint."""
import sys
monkeypatch.setitem(sys.modules, "kittentts", None)
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
from tools.tts_tool import text_to_speech_tool
# Write a config telling it to use kittentts
import yaml
(tmp_path / "config.yaml").write_text(
yaml.safe_dump({"tts": {"provider": "kittentts"}})
)
result = json.loads(text_to_speech_tool(text="Hello"))
assert result["success"] is False
assert "kittentts" in result["error"].lower()
assert "hermes setup tts" in result["error"].lower()

View file

@ -933,6 +933,58 @@ class TestEnableVoiceModeReal:
assert cli._voice_mode is True
class TestVoiceBeepConfigReal:
"""Tests the CLI voice beep toggle."""
@patch("hermes_cli.config.load_config", return_value={"voice": {}})
def test_beeps_enabled_by_default(self, _cfg):
cli = _make_voice_cli()
assert cli._voice_beeps_enabled() is True
@patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
def test_beeps_can_be_disabled(self, _cfg):
cli = _make_voice_cli()
assert cli._voice_beeps_enabled() is False
@patch("cli._cprint")
@patch("cli.threading.Thread")
@patch("tools.voice_mode.play_beep")
@patch("tools.voice_mode.create_audio_recorder")
@patch(
"tools.voice_mode.check_voice_requirements",
return_value={
"available": True,
"audio_available": True,
"stt_available": True,
"details": "OK",
"missing_packages": [],
},
)
@patch(
"hermes_cli.config.load_config",
return_value={
"voice": {
"beep_enabled": False,
"silence_threshold": 200,
"silence_duration": 3.0,
}
},
)
def test_start_recording_skips_beep_when_disabled(
self, _cfg, _req, mock_create, mock_beep, mock_thread, _cp
):
recorder = MagicMock()
recorder.supports_silence_autostop = True
mock_create.return_value = recorder
mock_thread.return_value = MagicMock(start=MagicMock())
cli = _make_voice_cli()
cli._voice_start_recording()
recorder.start.assert_called_once()
mock_beep.assert_not_called()
class TestDisableVoiceModeReal:
"""Tests _disable_voice_mode with real CLI instance."""
@ -1087,6 +1139,16 @@ class TestVoiceStopAndTranscribeReal:
cli._voice_stop_and_transcribe()
assert cli._pending_input.empty()
@patch("cli._cprint")
@patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
@patch("tools.voice_mode.play_beep")
def test_no_speech_detected_skips_beep_when_disabled(self, mock_beep, _cfg, _cp):
recorder = MagicMock()
recorder.stop.return_value = None
cli = _make_voice_cli(_voice_recording=True, _voice_recorder=recorder)
cli._voice_stop_and_transcribe()
mock_beep.assert_not_called()
@patch("cli._cprint")
@patch("cli.os.unlink")
@patch("cli.os.path.isfile", return_value=True)

View file

@ -44,16 +44,59 @@ def _get_allowed() -> set[str]:
_config_passthrough: frozenset[str] | None = None
def _is_hermes_provider_credential(name: str) -> bool:
"""True if ``name`` is a Hermes-managed provider credential (API key,
token, or similar) per ``_HERMES_PROVIDER_ENV_BLOCKLIST``.
Skill-declared ``required_environment_variables`` frontmatter must
not be able to override this list that was the bypass in
GHSA-rhgp-j443-p4rf where a malicious skill registered
``ANTHROPIC_TOKEN`` / ``OPENAI_API_KEY`` as passthrough and received
the credential in the ``execute_code`` child process, defeating the
sandbox's scrubbing guarantee.
Non-Hermes API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
in the blocklist and remain legitimately registerable skills that
wrap third-party APIs still work.
"""
try:
from tools.environments.local import _HERMES_PROVIDER_ENV_BLOCKLIST
except Exception:
return False
return name in _HERMES_PROVIDER_ENV_BLOCKLIST
def register_env_passthrough(var_names: Iterable[str]) -> None:
"""Register environment variable names as allowed in sandboxed environments.
Typically called when a skill declares ``required_environment_variables``.
Variables that are Hermes-managed provider credentials (from
``_HERMES_PROVIDER_ENV_BLOCKLIST``) are rejected here to preserve
the ``execute_code`` sandbox's credential-scrubbing guarantee per
GHSA-rhgp-j443-p4rf. A skill that needs to talk to a Hermes-managed
provider should do so via the agent's main-process tools (web_search,
web_extract, etc.) where the credential remains safely in the main
process.
Non-Hermes third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.)
pass through normally they were never in the sandbox scrub list.
"""
for name in var_names:
name = name.strip()
if name:
_get_allowed().add(name)
logger.debug("env passthrough: registered %s", name)
if not name:
continue
if _is_hermes_provider_credential(name):
logger.warning(
"env passthrough: refusing to register Hermes provider "
"credential %r (blocked by _HERMES_PROVIDER_ENV_BLOCKLIST). "
"Skills must not override the execute_code sandbox's "
"credential scrubbing; see GHSA-rhgp-j443-p4rf.",
name,
)
continue
_get_allowed().add(name)
logger.debug("env passthrough: registered %s", name)
def _load_config_passthrough() -> frozenset[str]:

View file

@ -213,6 +213,77 @@ def _make_run_env(env: dict) -> dict:
return run_env
def _read_terminal_shell_init_config() -> tuple[list[str], bool]:
"""Return (shell_init_files, auto_source_bashrc) from config.yaml.
Best-effort returns sensible defaults on any failure so terminal
execution never breaks because the config file is unreadable.
"""
try:
from hermes_cli.config import load_config
cfg = load_config() or {}
terminal_cfg = cfg.get("terminal") or {}
files = terminal_cfg.get("shell_init_files") or []
if not isinstance(files, list):
files = []
auto_bashrc = bool(terminal_cfg.get("auto_source_bashrc", True))
return [str(f) for f in files if f], auto_bashrc
except Exception:
return [], True
def _resolve_shell_init_files() -> list[str]:
"""Resolve the list of files to source before the login-shell snapshot.
Expands ``~`` and ``${VAR}`` references and drops anything that doesn't
exist on disk, so a missing ``~/.bashrc`` never breaks the snapshot.
The ``auto_source_bashrc`` path runs only when the user hasn't supplied
an explicit list once they have, Hermes trusts them.
"""
explicit, auto_bashrc = _read_terminal_shell_init_config()
candidates: list[str] = []
if explicit:
candidates.extend(explicit)
elif auto_bashrc and not _IS_WINDOWS:
# Bash's login-shell invocation does NOT source ~/.bashrc by default,
# so tools like nvm / asdf / pyenv that self-install there stay
# invisible to the snapshot without this nudge.
candidates.append("~/.bashrc")
resolved: list[str] = []
for raw in candidates:
try:
path = os.path.expandvars(os.path.expanduser(raw))
except Exception:
continue
if path and os.path.isfile(path):
resolved.append(path)
return resolved
def _prepend_shell_init(cmd_string: str, files: list[str]) -> str:
"""Prepend ``source <file>`` lines (guarded + silent) to a bash script.
Each file is wrapped so a failing rc file doesn't abort the whole
bootstrap: ``set +e`` keeps going on errors, ``2>/dev/null`` hides
noisy prompts, and ``|| true`` neutralises the exit status.
"""
if not files:
return cmd_string
prelude_parts = ["set +e"]
for path in files:
# shlex.quote isn't available here without an import; the files list
# comes from os.path.expanduser output so it's a concrete absolute
# path. Escape single quotes defensively anyway.
safe = path.replace("'", "'\\''")
prelude_parts.append(f"[ -r '{safe}' ] && . '{safe}' 2>/dev/null || true")
prelude = "\n".join(prelude_parts) + "\n"
return prelude + cmd_string
class LocalEnvironment(BaseEnvironment):
"""Run commands directly on the host machine.
@ -255,6 +326,16 @@ class LocalEnvironment(BaseEnvironment):
timeout: int = 120,
stdin_data: str | None = None) -> subprocess.Popen:
bash = _find_bash()
# For login-shell invocations (used by init_session to build the
# environment snapshot), prepend sources for the user's bashrc /
# custom init files so tools registered outside bash_profile
# (nvm, asdf, pyenv, …) end up on PATH in the captured snapshot.
# Non-login invocations are already sourcing the snapshot and
# don't need this.
if login:
init_files = _resolve_shell_init_files()
if init_files:
cmd_string = _prepend_shell_init(cmd_string, init_files)
args = [bash, "-l", "-c", cmd_string] if login else [bash, "-c", cmd_string]
run_env = _make_run_env(self.env)

View file

@ -35,6 +35,13 @@ from pathlib import Path
from hermes_constants import get_hermes_home
from tools.binary_extensions import BINARY_EXTENSIONS
from agent.file_safety import (
build_write_denied_paths,
build_write_denied_prefixes,
get_safe_write_root as _shared_get_safe_write_root,
is_write_denied as _shared_is_write_denied,
)
# ---------------------------------------------------------------------------
# Write-path deny list — blocks writes to sensitive system/credential files
@ -42,41 +49,9 @@ from tools.binary_extensions import BINARY_EXTENSIONS
_HOME = str(Path.home())
WRITE_DENIED_PATHS = {
os.path.realpath(p) for p in [
os.path.join(_HOME, ".ssh", "authorized_keys"),
os.path.join(_HOME, ".ssh", "id_rsa"),
os.path.join(_HOME, ".ssh", "id_ed25519"),
os.path.join(_HOME, ".ssh", "config"),
str(get_hermes_home() / ".env"),
os.path.join(_HOME, ".bashrc"),
os.path.join(_HOME, ".zshrc"),
os.path.join(_HOME, ".profile"),
os.path.join(_HOME, ".bash_profile"),
os.path.join(_HOME, ".zprofile"),
os.path.join(_HOME, ".netrc"),
os.path.join(_HOME, ".pgpass"),
os.path.join(_HOME, ".npmrc"),
os.path.join(_HOME, ".pypirc"),
"/etc/sudoers",
"/etc/passwd",
"/etc/shadow",
]
}
WRITE_DENIED_PATHS = build_write_denied_paths(_HOME)
WRITE_DENIED_PREFIXES = [
os.path.realpath(p) + os.sep for p in [
os.path.join(_HOME, ".ssh"),
os.path.join(_HOME, ".aws"),
os.path.join(_HOME, ".gnupg"),
os.path.join(_HOME, ".kube"),
"/etc/sudoers.d",
"/etc/systemd",
os.path.join(_HOME, ".docker"),
os.path.join(_HOME, ".azure"),
os.path.join(_HOME, ".config", "gh"),
]
]
WRITE_DENIED_PREFIXES = build_write_denied_prefixes(_HOME)
def _get_safe_write_root() -> Optional[str]:
@ -87,33 +62,12 @@ def _get_safe_write_root() -> Optional[str]:
not on the static deny list. Opt-in hardening for gateway/messaging
deployments that should only touch a workspace checkout.
"""
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
if not root:
return None
try:
return os.path.realpath(os.path.expanduser(root))
except Exception:
return None
return _shared_get_safe_write_root()
def _is_write_denied(path: str) -> bool:
"""Return True if path is on the write deny list."""
resolved = os.path.realpath(os.path.expanduser(str(path)))
# 1) Static deny list
if resolved in WRITE_DENIED_PATHS:
return True
for prefix in WRITE_DENIED_PREFIXES:
if resolved.startswith(prefix):
return True
# 2) Optional safe-root sandbox
safe_root = _get_safe_write_root()
if safe_root:
if not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
return True
return False
return _shared_is_write_denied(path)
# =============================================================================
@ -784,12 +738,14 @@ class ShellFileOperations(FileOperations):
content, old_string, new_string, replace_all
)
if error:
return PatchResult(error=error)
if match_count == 0:
return PatchResult(error=f"Could not find match for old_string in {path}")
if error or match_count == 0:
err_msg = error or f"Could not find match for old_string in {path}"
try:
from tools.fuzzy_match import format_no_match_hint
err_msg += format_no_match_hint(err_msg, match_count, old_string, content)
except Exception:
pass
return PatchResult(error=err_msg)
# Write back
write_result = self.write_file(path, new_content)
if write_result.error:

View file

@ -7,6 +7,9 @@ import logging
import os
import threading
from pathlib import Path
from typing import Optional
from agent.file_safety import get_read_block_error
from tools.binary_extensions import has_binary_extension
from tools.file_operations import ShellFileOperations
from agent.redact import redact_sensitive_text
@ -373,24 +376,9 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
# ── Hermes internal path guard ────────────────────────────────
# Prevent prompt injection via catalog or hub metadata files.
from hermes_constants import get_hermes_home as _get_hh
_hermes_home = _get_hh().resolve()
_blocked_dirs = [
_hermes_home / "skills" / ".hub" / "index-cache",
_hermes_home / "skills" / ".hub",
]
for _blocked in _blocked_dirs:
try:
_resolved.relative_to(_blocked)
return json.dumps({
"error": (
f"Access denied: {path} is an internal Hermes cache file "
"and cannot be read directly to prevent prompt injection. "
"Use the skills_list or skill_view tools instead."
)
})
except ValueError:
pass
block_error = get_read_block_error(path)
if block_error:
return json.dumps({"error": block_error})
# ── Dedup check ───────────────────────────────────────────────
# If we already read this exact (path, offset, limit) and the
@ -682,8 +670,11 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
result_json = json.dumps(result_dict, ensure_ascii=False)
# Hint when old_string not found — saves iterations where the agent
# retries with stale content instead of re-reading the file.
# Suppressed when patch_replace already attached a rich "Did you mean?"
# snippet (which is strictly more useful than the generic hint).
if result_dict.get("error") and "Could not find" in str(result_dict["error"]):
result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
if "Did you mean one of these sections?" not in str(result_dict["error"]):
result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
return result_json
except Exception as e:
return tool_error(str(e))

View file

@ -619,3 +619,86 @@ def _map_normalized_positions(original: str, normalized: str,
original_matches.append((orig_start, min(orig_end, len(original))))
return original_matches
def find_closest_lines(old_string: str, content: str, context_lines: int = 2, max_results: int = 3) -> str:
"""Find lines in content most similar to old_string for "did you mean?" feedback.
Returns a formatted string showing the closest matching lines with context,
or empty string if no useful match is found.
"""
if not old_string or not content:
return ""
old_lines = old_string.splitlines()
content_lines = content.splitlines()
if not old_lines or not content_lines:
return ""
# Use first line of old_string as anchor for search
anchor = old_lines[0].strip()
if not anchor:
# Try second line if first is blank
candidates = [l.strip() for l in old_lines if l.strip()]
if not candidates:
return ""
anchor = candidates[0]
# Score each line in content by similarity to anchor
scored = []
for i, line in enumerate(content_lines):
stripped = line.strip()
if not stripped:
continue
ratio = SequenceMatcher(None, anchor, stripped).ratio()
if ratio > 0.3:
scored.append((ratio, i))
if not scored:
return ""
# Take top matches
scored.sort(key=lambda x: -x[0])
top = scored[:max_results]
parts = []
seen_ranges = set()
for _, line_idx in top:
start = max(0, line_idx - context_lines)
end = min(len(content_lines), line_idx + len(old_lines) + context_lines)
key = (start, end)
if key in seen_ranges:
continue
seen_ranges.add(key)
snippet = "\n".join(
f"{start + j + 1:4d}| {content_lines[start + j]}"
for j in range(end - start)
)
parts.append(snippet)
if not parts:
return ""
return "\n---\n".join(parts)
def format_no_match_hint(error: Optional[str], match_count: int,
old_string: str, content: str) -> str:
"""Return a '\\n\\nDid you mean...' snippet for plain no-match errors.
Gated so the hint only fires for actual "old_string not found" failures.
Ambiguous-match ("Found N matches"), escape-drift, and identical-strings
errors all have ``match_count == 0`` but a "did you mean?" snippet would
be misleading those failed for unrelated reasons.
Returns an empty string when there's nothing useful to append.
"""
if match_count != 0:
return ""
if not error or not error.startswith("Could not find"):
return ""
hint = find_closest_lines(old_string, content)
if not hint:
return ""
return "\n\nDid you mean one of these sections?\n" + hint

Some files were not shown because too many files have changed in this diff Show more