mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-21 10:22:18 +00:00
Merge remote-tracking branch 'origin/main' into sid/types-and-lints
# Conflicts: # gateway/platforms/base.py # gateway/platforms/qqbot/adapter.py # gateway/platforms/slack.py # hermes_cli/main.py # scripts/batch_runner.py # tools/skills_tool.py # uv.lock
This commit is contained in:
commit
a9ed7cb3b4
117 changed files with 7791 additions and 611 deletions
49
AGENTS.md
49
AGENTS.md
|
|
@ -566,3 +566,52 @@ python -m pytest tests/ -q -n 4
|
|||
Worker count above 4 will surface test-ordering flakes that CI never sees.
|
||||
|
||||
Always run the full suite before pushing changes.
|
||||
|
||||
### Don't write change-detector tests
|
||||
|
||||
A test is a **change-detector** if it fails whenever data that is **expected
|
||||
to change** gets updated — model catalogs, config version numbers,
|
||||
enumeration counts, hardcoded lists of provider models. These tests add no
|
||||
behavioral coverage; they just guarantee that routine source updates break
|
||||
CI and cost engineering time to "fix."
|
||||
|
||||
**Do not write:**
|
||||
|
||||
```python
|
||||
# catalog snapshot — breaks every model release
|
||||
assert "gemini-2.5-pro" in _PROVIDER_MODELS["gemini"]
|
||||
assert "MiniMax-M2.7" in models
|
||||
|
||||
# config version literal — breaks every schema bump
|
||||
assert DEFAULT_CONFIG["_config_version"] == 21
|
||||
|
||||
# enumeration count — breaks every time a skill/provider is added
|
||||
assert len(_PROVIDER_MODELS["huggingface"]) == 8
|
||||
```
|
||||
|
||||
**Do write:**
|
||||
|
||||
```python
|
||||
# behavior: does the catalog plumbing work at all?
|
||||
assert "gemini" in _PROVIDER_MODELS
|
||||
assert len(_PROVIDER_MODELS["gemini"]) >= 1
|
||||
|
||||
# behavior: does migration bump the user's version to current latest?
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
|
||||
# invariant: no plan-only model leaks into the legacy list
|
||||
assert not (set(moonshot_models) & coding_plan_only_models)
|
||||
|
||||
# invariant: every model in the catalog has a context-length entry
|
||||
for m in _PROVIDER_MODELS["huggingface"]:
|
||||
assert m.lower() in DEFAULT_CONTEXT_LENGTHS_LOWER
|
||||
```
|
||||
|
||||
The rule: if the test reads like a snapshot of current data, delete it. If
|
||||
it reads like a contract about how two pieces of data must relate, keep it.
|
||||
When a PR adds a new provider/model and you want a test, make the test
|
||||
assert the relationship (e.g. "catalog entries all have context lengths"),
|
||||
not the specific names.
|
||||
|
||||
Reviewers should reject new change-detector tests; authors should convert
|
||||
them into invariants before re-requesting review.
|
||||
|
|
|
|||
|
|
@ -63,6 +63,9 @@ def make_approval_callback(
|
|||
logger.warning("Permission request timed out or failed: %s", exc)
|
||||
return "deny"
|
||||
|
||||
if response is None:
|
||||
return "deny"
|
||||
|
||||
outcome = response.outcome
|
||||
if isinstance(outcome, AllowedOutcome):
|
||||
option_id = outcome.option_id
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from __future__ import annotations
|
|||
|
||||
import asyncio
|
||||
import logging
|
||||
import os
|
||||
from collections import defaultdict, deque
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from typing import Any, Deque, Optional
|
||||
|
|
@ -51,7 +52,7 @@ try:
|
|||
except ImportError:
|
||||
from acp.schema import AuthMethod as AuthMethodAgent # type: ignore[attr-defined]
|
||||
|
||||
from acp_adapter.auth import detect_provider, has_provider
|
||||
from acp_adapter.auth import detect_provider
|
||||
from acp_adapter.events import (
|
||||
make_message_cb,
|
||||
make_step_cb,
|
||||
|
|
@ -71,6 +72,11 @@ except Exception:
|
|||
# Thread pool for running AIAgent (synchronous) in parallel.
|
||||
_executor = ThreadPoolExecutor(max_workers=4, thread_name_prefix="acp-agent")
|
||||
|
||||
# Server-side page size for list_sessions. The ACP ListSessionsRequest schema
|
||||
# does not expose a client-side limit, so this is a fixed cap that clients
|
||||
# paginate against using `cursor` / `next_cursor`.
|
||||
_LIST_SESSIONS_PAGE_SIZE = 50
|
||||
|
||||
|
||||
def _extract_text(
|
||||
prompt: list[
|
||||
|
|
@ -351,9 +357,18 @@ class HermesACPAgent(acp.Agent):
|
|||
)
|
||||
|
||||
async def authenticate(self, method_id: str, **kwargs: Any) -> AuthenticateResponse | None:
|
||||
if has_provider():
|
||||
return AuthenticateResponse()
|
||||
return None
|
||||
# Only accept authenticate() calls whose method_id matches the
|
||||
# provider we advertised in initialize(). Without this check,
|
||||
# authenticate() would acknowledge any method_id as long as the
|
||||
# server has provider credentials configured — harmless under
|
||||
# Hermes' threat model (ACP is stdio-only, local-trust), but poor
|
||||
# API hygiene and confusing if ACP ever grows multi-method auth.
|
||||
provider = detect_provider()
|
||||
if not provider:
|
||||
return None
|
||||
if not isinstance(method_id, str) or method_id.strip().lower() != provider:
|
||||
return None
|
||||
return AuthenticateResponse()
|
||||
|
||||
# ---- Session management -------------------------------------------------
|
||||
|
||||
|
|
@ -437,7 +452,28 @@ class HermesACPAgent(acp.Agent):
|
|||
cwd: str | None = None,
|
||||
**kwargs: Any,
|
||||
) -> ListSessionsResponse:
|
||||
"""List ACP sessions with optional ``cwd`` filtering and cursor pagination.
|
||||
|
||||
``cwd`` is passed through to ``SessionManager.list_sessions`` which already
|
||||
normalizes and filters by working directory. ``cursor`` is a ``session_id``
|
||||
previously returned as ``next_cursor``; results resume after that entry.
|
||||
Server-side page size is capped at ``_LIST_SESSIONS_PAGE_SIZE``; when more
|
||||
results remain, ``next_cursor`` is set to the last returned ``session_id``.
|
||||
"""
|
||||
infos = self.session_manager.list_sessions(cwd=cwd)
|
||||
|
||||
if cursor:
|
||||
for idx, s in enumerate(infos):
|
||||
if s["session_id"] == cursor:
|
||||
infos = infos[idx + 1:]
|
||||
break
|
||||
else:
|
||||
# Unknown cursor -> empty page (do not fall back to full list).
|
||||
infos = []
|
||||
|
||||
has_more = len(infos) > _LIST_SESSIONS_PAGE_SIZE
|
||||
infos = infos[:_LIST_SESSIONS_PAGE_SIZE]
|
||||
|
||||
sessions = []
|
||||
for s in infos:
|
||||
updated_at = s.get("updated_at")
|
||||
|
|
@ -451,7 +487,9 @@ class HermesACPAgent(acp.Agent):
|
|||
updated_at=updated_at,
|
||||
)
|
||||
)
|
||||
return ListSessionsResponse(sessions=sessions)
|
||||
|
||||
next_cursor = sessions[-1].session_id if has_more and sessions else None
|
||||
return ListSessionsResponse(sessions=sessions, next_cursor=next_cursor)
|
||||
|
||||
# ---- Prompt (core) ------------------------------------------------------
|
||||
|
||||
|
|
@ -517,15 +555,32 @@ class HermesACPAgent(acp.Agent):
|
|||
agent.step_callback = step_cb
|
||||
agent.message_callback = message_cb
|
||||
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
previous_approval_cb = getattr(_terminal_tool, "_approval_callback", None)
|
||||
_terminal_tool.set_approval_callback(approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not set ACP approval callback", exc_info=True)
|
||||
# Approval callback is per-thread (thread-local, GHSA-qg5c-hvr5-hjgr).
|
||||
# Set it INSIDE _run_agent so the TLS write happens in the executor
|
||||
# thread — setting it here would write to the event-loop thread's TLS,
|
||||
# not the executor's. Also set HERMES_INTERACTIVE so approval.py
|
||||
# takes the CLI-interactive path (which calls the registered
|
||||
# callback via prompt_dangerous_approval) instead of the
|
||||
# non-interactive auto-approve branch (GHSA-96vc-wcxf-jjff).
|
||||
# ACP's conn.request_permission maps cleanly to the interactive
|
||||
# callback shape — not the gateway-queue HERMES_EXEC_ASK path,
|
||||
# which requires a notify_cb registered in _gateway_notify_cbs.
|
||||
previous_approval_cb = None
|
||||
previous_interactive = None
|
||||
|
||||
def _run_agent() -> dict:
|
||||
nonlocal previous_approval_cb, previous_interactive
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
previous_approval_cb = _terminal_tool._get_approval_callback()
|
||||
_terminal_tool.set_approval_callback(approval_cb)
|
||||
except Exception:
|
||||
logger.debug("Could not set ACP approval callback", exc_info=True)
|
||||
# Signal to tools.approval that we have an interactive callback
|
||||
# and the non-interactive auto-approve path must not fire.
|
||||
previous_interactive = os.environ.get("HERMES_INTERACTIVE")
|
||||
os.environ["HERMES_INTERACTIVE"] = "1"
|
||||
try:
|
||||
result = agent.run_conversation(
|
||||
user_message=user_text,
|
||||
|
|
@ -537,6 +592,11 @@ class HermesACPAgent(acp.Agent):
|
|||
logger.exception("Agent error in session %s", session_id)
|
||||
return {"final_response": f"Error: {e}", "messages": state.history}
|
||||
finally:
|
||||
# Restore HERMES_INTERACTIVE.
|
||||
if previous_interactive is None:
|
||||
os.environ.pop("HERMES_INTERACTIVE", None)
|
||||
else:
|
||||
os.environ["HERMES_INTERACTIVE"] = previous_interactive
|
||||
if approval_cb:
|
||||
try:
|
||||
from tools import terminal_tool as _terminal_tool
|
||||
|
|
|
|||
326
agent/account_usage.py
Normal file
326
agent/account_usage.py
Normal file
|
|
@ -0,0 +1,326 @@
|
|||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Optional
|
||||
|
||||
import httpx
|
||||
|
||||
from agent.anthropic_adapter import _is_oauth_token, resolve_anthropic_token
|
||||
from hermes_cli.auth import _read_codex_tokens, resolve_codex_runtime_credentials
|
||||
from hermes_cli.runtime_provider import resolve_runtime_provider
|
||||
|
||||
|
||||
def _utc_now() -> datetime:
|
||||
return datetime.now(timezone.utc)
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AccountUsageWindow:
|
||||
label: str
|
||||
used_percent: Optional[float] = None
|
||||
reset_at: Optional[datetime] = None
|
||||
detail: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class AccountUsageSnapshot:
|
||||
provider: str
|
||||
source: str
|
||||
fetched_at: datetime
|
||||
title: str = "Account limits"
|
||||
plan: Optional[str] = None
|
||||
windows: tuple[AccountUsageWindow, ...] = ()
|
||||
details: tuple[str, ...] = ()
|
||||
unavailable_reason: Optional[str] = None
|
||||
|
||||
@property
|
||||
def available(self) -> bool:
|
||||
return bool(self.windows or self.details) and not self.unavailable_reason
|
||||
|
||||
|
||||
def _title_case_slug(value: Optional[str]) -> Optional[str]:
|
||||
cleaned = str(value or "").strip()
|
||||
if not cleaned:
|
||||
return None
|
||||
return cleaned.replace("_", " ").replace("-", " ").title()
|
||||
|
||||
|
||||
def _parse_dt(value: Any) -> Optional[datetime]:
|
||||
if value in (None, ""):
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
return datetime.fromtimestamp(float(value), tz=timezone.utc)
|
||||
if isinstance(value, str):
|
||||
text = value.strip()
|
||||
if not text:
|
||||
return None
|
||||
if text.endswith("Z"):
|
||||
text = text[:-1] + "+00:00"
|
||||
try:
|
||||
dt = datetime.fromisoformat(text)
|
||||
return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
|
||||
except ValueError:
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _format_reset(dt: Optional[datetime]) -> str:
|
||||
if not dt:
|
||||
return "unknown"
|
||||
local_dt = dt.astimezone()
|
||||
delta = dt - _utc_now()
|
||||
total_seconds = int(delta.total_seconds())
|
||||
if total_seconds <= 0:
|
||||
return f"now ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
|
||||
hours, rem = divmod(total_seconds, 3600)
|
||||
minutes = rem // 60
|
||||
if hours >= 24:
|
||||
days, hours = divmod(hours, 24)
|
||||
rel = f"in {days}d {hours}h"
|
||||
elif hours > 0:
|
||||
rel = f"in {hours}h {minutes}m"
|
||||
else:
|
||||
rel = f"in {minutes}m"
|
||||
return f"{rel} ({local_dt.strftime('%Y-%m-%d %H:%M %Z')})"
|
||||
|
||||
|
||||
def render_account_usage_lines(snapshot: Optional[AccountUsageSnapshot], *, markdown: bool = False) -> list[str]:
|
||||
if not snapshot:
|
||||
return []
|
||||
header = f"📈 {'**' if markdown else ''}{snapshot.title}{'**' if markdown else ''}"
|
||||
lines = [header]
|
||||
if snapshot.plan:
|
||||
lines.append(f"Provider: {snapshot.provider} ({snapshot.plan})")
|
||||
else:
|
||||
lines.append(f"Provider: {snapshot.provider}")
|
||||
for window in snapshot.windows:
|
||||
if window.used_percent is None:
|
||||
base = f"{window.label}: unavailable"
|
||||
else:
|
||||
remaining = max(0, round(100 - float(window.used_percent)))
|
||||
used = max(0, round(float(window.used_percent)))
|
||||
base = f"{window.label}: {remaining}% remaining ({used}% used)"
|
||||
if window.reset_at:
|
||||
base += f" • resets {_format_reset(window.reset_at)}"
|
||||
elif window.detail:
|
||||
base += f" • {window.detail}"
|
||||
lines.append(base)
|
||||
for detail in snapshot.details:
|
||||
lines.append(detail)
|
||||
if snapshot.unavailable_reason:
|
||||
lines.append(f"Unavailable: {snapshot.unavailable_reason}")
|
||||
return lines
|
||||
|
||||
|
||||
def _resolve_codex_usage_url(base_url: str) -> str:
|
||||
normalized = (base_url or "").strip().rstrip("/")
|
||||
if not normalized:
|
||||
normalized = "https://chatgpt.com/backend-api/codex"
|
||||
if normalized.endswith("/codex"):
|
||||
normalized = normalized[: -len("/codex")]
|
||||
if "/backend-api" in normalized:
|
||||
return normalized + "/wham/usage"
|
||||
return normalized + "/api/codex/usage"
|
||||
|
||||
|
||||
def _fetch_codex_account_usage() -> Optional[AccountUsageSnapshot]:
|
||||
creds = resolve_codex_runtime_credentials(refresh_if_expiring=True)
|
||||
token_data = _read_codex_tokens()
|
||||
tokens = token_data.get("tokens") or {}
|
||||
account_id = str(tokens.get("account_id", "") or "").strip() or None
|
||||
headers = {
|
||||
"Authorization": f"Bearer {creds['api_key']}",
|
||||
"Accept": "application/json",
|
||||
"User-Agent": "codex-cli",
|
||||
}
|
||||
if account_id:
|
||||
headers["ChatGPT-Account-Id"] = account_id
|
||||
with httpx.Client(timeout=15.0) as client:
|
||||
response = client.get(_resolve_codex_usage_url(creds.get("base_url", "")), headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
rate_limit = payload.get("rate_limit") or {}
|
||||
windows: list[AccountUsageWindow] = []
|
||||
for key, label in (("primary_window", "Session"), ("secondary_window", "Weekly")):
|
||||
window = rate_limit.get(key) or {}
|
||||
used = window.get("used_percent")
|
||||
if used is None:
|
||||
continue
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label=label,
|
||||
used_percent=float(used),
|
||||
reset_at=_parse_dt(window.get("reset_at")),
|
||||
)
|
||||
)
|
||||
details: list[str] = []
|
||||
credits = payload.get("credits") or {}
|
||||
if credits.get("has_credits"):
|
||||
balance = credits.get("balance")
|
||||
if isinstance(balance, (int, float)):
|
||||
details.append(f"Credits balance: ${float(balance):.2f}")
|
||||
elif credits.get("unlimited"):
|
||||
details.append("Credits balance: unlimited")
|
||||
return AccountUsageSnapshot(
|
||||
provider="openai-codex",
|
||||
source="usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
plan=_title_case_slug(payload.get("plan_type")),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def _fetch_anthropic_account_usage() -> Optional[AccountUsageSnapshot]:
|
||||
token = (resolve_anthropic_token() or "").strip()
|
||||
if not token:
|
||||
return None
|
||||
if not _is_oauth_token(token):
|
||||
return AccountUsageSnapshot(
|
||||
provider="anthropic",
|
||||
source="oauth_usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
unavailable_reason="Anthropic account limits are only available for OAuth-backed Claude accounts.",
|
||||
)
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/json",
|
||||
"Content-Type": "application/json",
|
||||
"anthropic-beta": "oauth-2025-04-20",
|
||||
"User-Agent": "claude-code/2.1.0",
|
||||
}
|
||||
with httpx.Client(timeout=15.0) as client:
|
||||
response = client.get("https://api.anthropic.com/api/oauth/usage", headers=headers)
|
||||
response.raise_for_status()
|
||||
payload = response.json() or {}
|
||||
windows: list[AccountUsageWindow] = []
|
||||
mapping = (
|
||||
("five_hour", "Current session"),
|
||||
("seven_day", "Current week"),
|
||||
("seven_day_opus", "Opus week"),
|
||||
("seven_day_sonnet", "Sonnet week"),
|
||||
)
|
||||
for key, label in mapping:
|
||||
window = payload.get(key) or {}
|
||||
util = window.get("utilization")
|
||||
if util is None:
|
||||
continue
|
||||
used = float(util) * 100 if float(util) <= 1 else float(util)
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label=label,
|
||||
used_percent=used,
|
||||
reset_at=_parse_dt(window.get("resets_at")),
|
||||
)
|
||||
)
|
||||
details: list[str] = []
|
||||
extra = payload.get("extra_usage") or {}
|
||||
if extra.get("is_enabled"):
|
||||
used_credits = extra.get("used_credits")
|
||||
monthly_limit = extra.get("monthly_limit")
|
||||
currency = extra.get("currency") or "USD"
|
||||
if isinstance(used_credits, (int, float)) and isinstance(monthly_limit, (int, float)):
|
||||
details.append(
|
||||
f"Extra usage: {used_credits:.2f} / {monthly_limit:.2f} {currency}"
|
||||
)
|
||||
return AccountUsageSnapshot(
|
||||
provider="anthropic",
|
||||
source="oauth_usage_api",
|
||||
fetched_at=_utc_now(),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def _fetch_openrouter_account_usage(base_url: Optional[str], api_key: Optional[str]) -> Optional[AccountUsageSnapshot]:
|
||||
runtime = resolve_runtime_provider(
|
||||
requested="openrouter",
|
||||
explicit_base_url=base_url,
|
||||
explicit_api_key=api_key,
|
||||
)
|
||||
token = str(runtime.get("api_key", "") or "").strip()
|
||||
if not token:
|
||||
return None
|
||||
normalized = str(runtime.get("base_url", "") or "").rstrip("/")
|
||||
credits_url = f"{normalized}/credits"
|
||||
key_url = f"{normalized}/key"
|
||||
headers = {
|
||||
"Authorization": f"Bearer {token}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
with httpx.Client(timeout=10.0) as client:
|
||||
credits_resp = client.get(credits_url, headers=headers)
|
||||
credits_resp.raise_for_status()
|
||||
credits = (credits_resp.json() or {}).get("data") or {}
|
||||
try:
|
||||
key_resp = client.get(key_url, headers=headers)
|
||||
key_resp.raise_for_status()
|
||||
key_data = (key_resp.json() or {}).get("data") or {}
|
||||
except Exception:
|
||||
key_data = {}
|
||||
total_credits = float(credits.get("total_credits") or 0.0)
|
||||
total_usage = float(credits.get("total_usage") or 0.0)
|
||||
details = [f"Credits balance: ${max(0.0, total_credits - total_usage):.2f}"]
|
||||
windows: list[AccountUsageWindow] = []
|
||||
limit = key_data.get("limit")
|
||||
limit_remaining = key_data.get("limit_remaining")
|
||||
limit_reset = str(key_data.get("limit_reset") or "").strip()
|
||||
usage = key_data.get("usage")
|
||||
if (
|
||||
isinstance(limit, (int, float))
|
||||
and float(limit) > 0
|
||||
and isinstance(limit_remaining, (int, float))
|
||||
and 0 <= float(limit_remaining) <= float(limit)
|
||||
):
|
||||
limit_value = float(limit)
|
||||
remaining_value = float(limit_remaining)
|
||||
used_percent = ((limit_value - remaining_value) / limit_value) * 100
|
||||
detail_parts = [f"${remaining_value:.2f} of ${limit_value:.2f} remaining"]
|
||||
if limit_reset:
|
||||
detail_parts.append(f"resets {limit_reset}")
|
||||
windows.append(
|
||||
AccountUsageWindow(
|
||||
label="API key quota",
|
||||
used_percent=used_percent,
|
||||
detail=" • ".join(detail_parts),
|
||||
)
|
||||
)
|
||||
if isinstance(usage, (int, float)):
|
||||
usage_parts = [f"API key usage: ${float(usage):.2f} total"]
|
||||
for value, label in (
|
||||
(key_data.get("usage_daily"), "today"),
|
||||
(key_data.get("usage_weekly"), "this week"),
|
||||
(key_data.get("usage_monthly"), "this month"),
|
||||
):
|
||||
if isinstance(value, (int, float)) and float(value) > 0:
|
||||
usage_parts.append(f"${float(value):.2f} {label}")
|
||||
details.append(" • ".join(usage_parts))
|
||||
return AccountUsageSnapshot(
|
||||
provider="openrouter",
|
||||
source="credits_api",
|
||||
fetched_at=_utc_now(),
|
||||
windows=tuple(windows),
|
||||
details=tuple(details),
|
||||
)
|
||||
|
||||
|
||||
def fetch_account_usage(
|
||||
provider: Optional[str],
|
||||
*,
|
||||
base_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
) -> Optional[AccountUsageSnapshot]:
|
||||
normalized = str(provider or "").strip().lower()
|
||||
if normalized in {"", "auto", "custom"}:
|
||||
return None
|
||||
try:
|
||||
if normalized == "openai-codex":
|
||||
return _fetch_codex_account_usage()
|
||||
if normalized == "anthropic":
|
||||
return _fetch_anthropic_account_usage()
|
||||
if normalized == "openrouter":
|
||||
return _fetch_openrouter_account_usage(base_url, api_key)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
|
@ -19,6 +19,7 @@ from pathlib import Path
|
|||
from hermes_constants import get_hermes_home
|
||||
from types import SimpleNamespace
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from utils import normalize_proxy_env_vars
|
||||
|
||||
try:
|
||||
import anthropic as _anthropic_sdk
|
||||
|
|
@ -308,6 +309,9 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional
|
|||
"The 'anthropic' package is required for the Anthropic provider. "
|
||||
"Install it with: pip install 'anthropic>=0.39.0'"
|
||||
)
|
||||
|
||||
normalize_proxy_env_vars()
|
||||
|
||||
from httpx import Timeout
|
||||
|
||||
normalized_base_url = _normalize_base_url_text(base_url)
|
||||
|
|
@ -1525,3 +1529,42 @@ def normalize_anthropic_response(
|
|||
),
|
||||
finish_reason,
|
||||
)
|
||||
|
||||
|
||||
def normalize_anthropic_response_v2(
|
||||
response,
|
||||
strip_tool_prefix: bool = False,
|
||||
) -> "NormalizedResponse":
|
||||
"""Normalize Anthropic response to NormalizedResponse.
|
||||
|
||||
Wraps the existing normalize_anthropic_response() and maps its output
|
||||
to the shared transport types. This allows incremental migration —
|
||||
one call site at a time — without changing the original function.
|
||||
"""
|
||||
from agent.transports.types import NormalizedResponse, build_tool_call
|
||||
|
||||
assistant_msg, finish_reason = normalize_anthropic_response(response, strip_tool_prefix)
|
||||
|
||||
tool_calls = None
|
||||
if assistant_msg.tool_calls:
|
||||
tool_calls = [
|
||||
build_tool_call(
|
||||
id=tc.id,
|
||||
name=tc.function.name,
|
||||
arguments=tc.function.arguments,
|
||||
)
|
||||
for tc in assistant_msg.tool_calls
|
||||
]
|
||||
|
||||
provider_data = {}
|
||||
if getattr(assistant_msg, "reasoning_details", None):
|
||||
provider_data["reasoning_details"] = assistant_msg.reasoning_details
|
||||
|
||||
return NormalizedResponse(
|
||||
content=assistant_msg.content,
|
||||
tool_calls=tool_calls,
|
||||
finish_reason=finish_reason,
|
||||
reasoning=getattr(assistant_msg, "reasoning", None),
|
||||
usage=None, # Anthropic usage is on the raw response, not the normaliser
|
||||
provider_data=provider_data or None,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ if TYPE_CHECKING:
|
|||
from agent.credential_pool import load_pool
|
||||
from hermes_cli.config import get_hermes_home
|
||||
from hermes_constants import OPENROUTER_BASE_URL
|
||||
from utils import base_url_host_matches, base_url_hostname
|
||||
from utils import base_url_host_matches, base_url_hostname, normalize_proxy_env_vars
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -1036,6 +1036,8 @@ def _validate_proxy_env_urls() -> None:
|
|||
"""
|
||||
from urllib.parse import urlparse
|
||||
|
||||
normalize_proxy_env_vars()
|
||||
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = str(os.environ.get(key) or "").strip()
|
||||
|
|
|
|||
|
|
@ -21,6 +21,9 @@ from pathlib import Path
|
|||
from types import SimpleNamespace
|
||||
from typing import Any
|
||||
|
||||
from agent.file_safety import get_read_block_error, is_write_denied
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
ACP_MARKER_BASE_URL = "acp://copilot"
|
||||
_DEFAULT_TIMEOUT_SECONDS = 900.0
|
||||
|
||||
|
|
@ -54,6 +57,18 @@ def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
|
|||
}
|
||||
|
||||
|
||||
def _permission_denied(message_id: Any) -> dict[str, Any]:
|
||||
return {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
"result": {
|
||||
"outcome": {
|
||||
"outcome": "cancelled",
|
||||
}
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
def _format_messages_as_prompt(
|
||||
messages: list[dict[str, Any]],
|
||||
model: str | None = None,
|
||||
|
|
@ -535,18 +550,13 @@ class CopilotACPClient:
|
|||
params = msg.get("params") or {}
|
||||
|
||||
if method == "session/request_permission":
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
"result": {
|
||||
"outcome": {
|
||||
"outcome": "allow_once",
|
||||
}
|
||||
},
|
||||
}
|
||||
response = _permission_denied(message_id)
|
||||
elif method == "fs/read_text_file":
|
||||
try:
|
||||
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
|
||||
block_error = get_read_block_error(str(path))
|
||||
if block_error:
|
||||
raise PermissionError(block_error)
|
||||
content = path.read_text() if path.exists() else ""
|
||||
line = params.get("line")
|
||||
limit = params.get("limit")
|
||||
|
|
@ -555,6 +565,8 @@ class CopilotACPClient:
|
|||
start = line - 1
|
||||
end = start + limit if isinstance(limit, int) and limit > 0 else None
|
||||
content = "".join(lines[start:end])
|
||||
if content:
|
||||
content = redact_sensitive_text(content)
|
||||
response = {
|
||||
"jsonrpc": "2.0",
|
||||
"id": message_id,
|
||||
|
|
@ -567,6 +579,10 @@ class CopilotACPClient:
|
|||
elif method == "fs/write_text_file":
|
||||
try:
|
||||
path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
|
||||
if is_write_denied(str(path)):
|
||||
raise PermissionError(
|
||||
f"Write denied: '{path}' is a protected system/credential file."
|
||||
)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
path.write_text(str(params.get("content") or ""))
|
||||
response = {
|
||||
|
|
|
|||
|
|
@ -998,6 +998,14 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
active_sources: Set[str] = set()
|
||||
auth_store = _load_auth_store()
|
||||
|
||||
# Shared suppression gate — used at every upsert site so
|
||||
# `hermes auth remove <provider> <N>` is stable across all source types.
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed as _is_suppressed
|
||||
except ImportError:
|
||||
def _is_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
|
||||
if provider == "anthropic":
|
||||
# Only auto-discover external credentials (Claude Code, Hermes PKCE)
|
||||
# when the user has explicitly configured anthropic as their provider.
|
||||
|
|
@ -1017,13 +1025,8 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
("claude_code", read_claude_code_credentials()),
|
||||
):
|
||||
if creds and creds.get("accessToken"):
|
||||
# Check if user explicitly removed this source
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
if is_source_suppressed(provider, source_name):
|
||||
continue
|
||||
except ImportError:
|
||||
pass
|
||||
if _is_suppressed(provider, source_name):
|
||||
continue
|
||||
active_sources.add(source_name)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
|
|
@ -1041,7 +1044,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
|
||||
elif provider == "nous":
|
||||
state = _load_provider_state(auth_store, "nous")
|
||||
if state:
|
||||
if state and not _is_suppressed(provider, "device_code"):
|
||||
active_sources.add("device_code")
|
||||
# Prefer a user-supplied label embedded in the singleton state
|
||||
# (set by persist_nous_credentials(label=...) when the user ran
|
||||
|
|
@ -1082,20 +1085,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
token, source = resolve_copilot_token()
|
||||
if token:
|
||||
source_name = "gh_cli" if "gh" in source.lower() else f"env:{source}"
|
||||
active_sources.add(source_name)
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": token,
|
||||
"base_url": pconfig.inference_base_url if pconfig else "",
|
||||
"label": source,
|
||||
},
|
||||
)
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
pconfig = PROVIDER_REGISTRY.get(provider)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": token,
|
||||
"base_url": pconfig.inference_base_url if pconfig else "",
|
||||
"label": source,
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Copilot token seed failed: %s", exc)
|
||||
|
||||
|
|
@ -1111,20 +1115,21 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
token = creds.get("api_key", "")
|
||||
if token:
|
||||
source_name = creds.get("source", "qwen-cli")
|
||||
active_sources.add(source_name)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"access_token": token,
|
||||
"expires_at_ms": creds.get("expires_at_ms"),
|
||||
"base_url": creds.get("base_url", ""),
|
||||
"label": creds.get("auth_file", source_name),
|
||||
},
|
||||
)
|
||||
if not _is_suppressed(provider, source_name):
|
||||
active_sources.add(source_name)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
source_name,
|
||||
{
|
||||
"source": source_name,
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"access_token": token,
|
||||
"expires_at_ms": creds.get("expires_at_ms"),
|
||||
"base_url": creds.get("base_url", ""),
|
||||
"label": creds.get("auth_file", source_name),
|
||||
},
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.debug("Qwen OAuth token seed failed: %s", exc)
|
||||
|
||||
|
|
@ -1133,13 +1138,7 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
# the device_code source as suppressed so it won't be re-seeded from
|
||||
# the Hermes auth store. Without this gate the removal is instantly
|
||||
# undone on the next load_pool() call.
|
||||
codex_suppressed = False
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
codex_suppressed = is_source_suppressed(provider, "device_code")
|
||||
except ImportError:
|
||||
pass
|
||||
if codex_suppressed:
|
||||
if _is_suppressed(provider, "device_code"):
|
||||
return changed, active_sources
|
||||
|
||||
state = _load_provider_state(auth_store, "openai-codex")
|
||||
|
|
@ -1173,10 +1172,22 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool, Set[str]]:
|
||||
changed = False
|
||||
active_sources: Set[str] = set()
|
||||
# Honour user suppression — `hermes auth remove <provider> <N>` for an
|
||||
# env-seeded credential marks the env:<VAR> source as suppressed so it
|
||||
# won't be re-seeded from the user's shell environment or ~/.hermes/.env.
|
||||
# Without this gate the removal is silently undone on the next
|
||||
# load_pool() call whenever the var is still exported by the shell.
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed as _is_source_suppressed
|
||||
except ImportError:
|
||||
def _is_source_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
if provider == "openrouter":
|
||||
token = os.getenv("OPENROUTER_API_KEY", "").strip()
|
||||
if token:
|
||||
source = "env:OPENROUTER_API_KEY"
|
||||
if _is_source_suppressed(provider, source):
|
||||
return changed, active_sources
|
||||
active_sources.add(source)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
|
|
@ -1213,6 +1224,8 @@ def _seed_from_env(provider: str, entries: List[PooledCredential]) -> Tuple[bool
|
|||
if not token:
|
||||
continue
|
||||
source = f"env:{env_var}"
|
||||
if _is_source_suppressed(provider, source):
|
||||
continue
|
||||
active_sources.add(source)
|
||||
auth_type = AUTH_TYPE_OAUTH if provider == "anthropic" and not token.startswith("sk-ant-api") else AUTH_TYPE_API_KEY
|
||||
base_url = env_url or pconfig.inference_base_url
|
||||
|
|
@ -1257,6 +1270,13 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
|
|||
changed = False
|
||||
active_sources: Set[str] = set()
|
||||
|
||||
# Shared suppression gate — same pattern as _seed_from_env/_seed_from_singletons.
|
||||
try:
|
||||
from hermes_cli.auth import is_source_suppressed as _is_suppressed
|
||||
except ImportError:
|
||||
def _is_suppressed(_p, _s): # type: ignore[misc]
|
||||
return False
|
||||
|
||||
# Seed from the custom_providers config entry's api_key field
|
||||
cp_config = _get_custom_provider_config(pool_key)
|
||||
if cp_config:
|
||||
|
|
@ -1265,19 +1285,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
|
|||
name = str(cp_config.get("name") or "").strip()
|
||||
if api_key:
|
||||
source = f"config:{name}"
|
||||
active_sources.add(source)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
pool_key,
|
||||
source,
|
||||
{
|
||||
"source": source,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": api_key,
|
||||
"base_url": base_url,
|
||||
"label": name or source,
|
||||
},
|
||||
)
|
||||
if not _is_suppressed(pool_key, source):
|
||||
active_sources.add(source)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
pool_key,
|
||||
source,
|
||||
{
|
||||
"source": source,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": api_key,
|
||||
"base_url": base_url,
|
||||
"label": name or source,
|
||||
},
|
||||
)
|
||||
|
||||
# Seed from model.api_key if model.provider=='custom' and model.base_url matches
|
||||
try:
|
||||
|
|
@ -1297,19 +1318,20 @@ def _seed_custom_pool(pool_key: str, entries: List[PooledCredential]) -> Tuple[b
|
|||
matched_key = get_custom_provider_pool_key(model_base_url)
|
||||
if matched_key == pool_key:
|
||||
source = "model_config"
|
||||
active_sources.add(source)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
pool_key,
|
||||
source,
|
||||
{
|
||||
"source": source,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": model_api_key,
|
||||
"base_url": model_base_url,
|
||||
"label": "model_config",
|
||||
},
|
||||
)
|
||||
if not _is_suppressed(pool_key, source):
|
||||
active_sources.add(source)
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
pool_key,
|
||||
source,
|
||||
{
|
||||
"source": source,
|
||||
"auth_type": AUTH_TYPE_API_KEY,
|
||||
"access_token": model_api_key,
|
||||
"base_url": model_base_url,
|
||||
"label": "model_config",
|
||||
},
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
|
|
|||
401
agent/credential_sources.py
Normal file
401
agent/credential_sources.py
Normal file
|
|
@ -0,0 +1,401 @@
|
|||
"""Unified removal contract for every credential source Hermes reads from.
|
||||
|
||||
Hermes seeds its credential pool from many places:
|
||||
|
||||
env:<VAR> — os.environ / ~/.hermes/.env
|
||||
claude_code — ~/.claude/.credentials.json
|
||||
hermes_pkce — ~/.hermes/.anthropic_oauth.json
|
||||
device_code — auth.json providers.<provider> (nous, openai-codex, ...)
|
||||
qwen-cli — ~/.qwen/oauth_creds.json
|
||||
gh_cli — gh auth token
|
||||
config:<name> — custom_providers config entry
|
||||
model_config — model.api_key when model.provider == "custom"
|
||||
manual — user ran `hermes auth add`
|
||||
|
||||
Each source has its own reader inside ``agent.credential_pool._seed_from_*``
|
||||
(which keep their existing shape — we haven't restructured them). What we
|
||||
unify here is **removal**:
|
||||
|
||||
``hermes auth remove <provider> <N>`` must make the pool entry stay gone.
|
||||
|
||||
Before this module, every source had an ad-hoc removal branch in
|
||||
``auth_remove_command``, and several sources had no branch at all — so
|
||||
``auth remove`` silently reverted on the next ``load_pool()`` call for
|
||||
qwen-cli, nous device_code (partial), hermes_pkce, copilot gh_cli, and
|
||||
custom-config sources.
|
||||
|
||||
Now every source registers a ``RemovalStep`` that does exactly three things
|
||||
in the same shape:
|
||||
|
||||
1. Clean up whatever externally-readable state the source reads from
|
||||
(.env line, auth.json block, OAuth file, etc.)
|
||||
2. Suppress the ``(provider, source_id)`` in auth.json so the
|
||||
corresponding ``_seed_from_*`` branch skips the upsert on re-load
|
||||
3. Return ``RemovalResult`` describing what was cleaned and any
|
||||
diagnostic hints the user should see (shell-exported env vars,
|
||||
external credential files we deliberately don't delete, etc.)
|
||||
|
||||
Adding a new credential source is:
|
||||
- wire up a reader branch in ``_seed_from_*`` (existing pattern)
|
||||
- gate that reader behind ``is_source_suppressed(provider, source_id)``
|
||||
- register a ``RemovalStep`` here
|
||||
|
||||
No more per-source if/elif chain in ``auth_remove_command``.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemovalResult:
|
||||
"""Outcome of removing a credential source.
|
||||
|
||||
Attributes:
|
||||
cleaned: Short strings describing external state that was actually
|
||||
mutated (``"Cleared XAI_API_KEY from .env"``,
|
||||
``"Cleared openai-codex OAuth tokens from auth store"``).
|
||||
Printed as plain lines to the user.
|
||||
hints: Diagnostic lines ABOUT state the user may need to clean up
|
||||
themselves or is deliberately left intact (shell-exported env
|
||||
var, Claude Code credential file we don't delete, etc.).
|
||||
Printed as plain lines to the user. Always non-destructive.
|
||||
suppress: Whether to call ``suppress_credential_source`` after
|
||||
cleanup so future ``load_pool`` calls skip this source.
|
||||
Default True — almost every source needs this to stay sticky.
|
||||
The only legitimate False is ``manual`` entries, which aren't
|
||||
seeded from anywhere external.
|
||||
"""
|
||||
|
||||
cleaned: List[str] = field(default_factory=list)
|
||||
hints: List[str] = field(default_factory=list)
|
||||
suppress: bool = True
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemovalStep:
|
||||
"""How to remove one specific credential source cleanly.
|
||||
|
||||
Attributes:
|
||||
provider: Provider pool key (``"xai"``, ``"anthropic"``, ``"nous"``, ...).
|
||||
Special value ``"*"`` means "matches any provider" — used for
|
||||
sources like ``manual`` that aren't provider-specific.
|
||||
source_id: Source identifier as it appears in
|
||||
``PooledCredential.source``. May be a literal (``"claude_code"``)
|
||||
or a prefix pattern matched via ``match_fn``.
|
||||
match_fn: Optional predicate overriding literal ``source_id``
|
||||
matching. Gets the removed entry's source string. Used for
|
||||
``env:*`` (any env-seeded key), ``config:*`` (any custom
|
||||
pool), and ``manual:*`` (any manual-source variant).
|
||||
remove_fn: ``(provider, removed_entry) -> RemovalResult``. Does the
|
||||
actual cleanup and returns what happened for the user.
|
||||
description: One-line human-readable description for docs / tests.
|
||||
"""
|
||||
|
||||
provider: str
|
||||
source_id: str
|
||||
remove_fn: Callable[..., RemovalResult]
|
||||
match_fn: Optional[Callable[[str], bool]] = None
|
||||
description: str = ""
|
||||
|
||||
def matches(self, provider: str, source: str) -> bool:
|
||||
if self.provider != "*" and self.provider != provider:
|
||||
return False
|
||||
if self.match_fn is not None:
|
||||
return self.match_fn(source)
|
||||
return source == self.source_id
|
||||
|
||||
|
||||
_REGISTRY: List[RemovalStep] = []
|
||||
|
||||
|
||||
def register(step: RemovalStep) -> RemovalStep:
|
||||
_REGISTRY.append(step)
|
||||
return step
|
||||
|
||||
|
||||
def find_removal_step(provider: str, source: str) -> Optional[RemovalStep]:
|
||||
"""Return the first matching RemovalStep, or None if unregistered.
|
||||
|
||||
Unregistered sources fall through to the default remove path in
|
||||
``auth_remove_command``: the pool entry is already gone (that happens
|
||||
before dispatch), no external cleanup, no suppression. This is the
|
||||
correct behaviour for ``manual`` entries — they were only ever stored
|
||||
in the pool, nothing external to clean up.
|
||||
"""
|
||||
for step in _REGISTRY:
|
||||
if step.matches(provider, source):
|
||||
return step
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Individual RemovalStep implementations — one per source.
|
||||
# ---------------------------------------------------------------------------
|
||||
# Each remove_fn is intentionally small and single-purpose. Adding a new
|
||||
# credential source means adding ONE entry here — no other changes to
|
||||
# auth_remove_command.
|
||||
|
||||
|
||||
def _remove_env_source(provider: str, removed) -> RemovalResult:
|
||||
"""env:<VAR> — the most common case.
|
||||
|
||||
Handles three user situations:
|
||||
1. Var lives only in ~/.hermes/.env → clear it
|
||||
2. Var lives only in the user's shell (shell profile, systemd
|
||||
EnvironmentFile, launchd plist) → hint them where to unset it
|
||||
3. Var lives in both → clear from .env, hint about shell
|
||||
"""
|
||||
from hermes_cli.config import get_env_path, remove_env_value
|
||||
|
||||
result = RemovalResult()
|
||||
env_var = removed.source[len("env:"):]
|
||||
if not env_var:
|
||||
return result
|
||||
|
||||
# Detect shell vs .env BEFORE remove_env_value pops os.environ.
|
||||
env_in_process = bool(os.getenv(env_var))
|
||||
env_in_dotenv = False
|
||||
try:
|
||||
env_path = get_env_path()
|
||||
if env_path.exists():
|
||||
env_in_dotenv = any(
|
||||
line.strip().startswith(f"{env_var}=")
|
||||
for line in env_path.read_text(errors="replace").splitlines()
|
||||
)
|
||||
except OSError:
|
||||
pass
|
||||
shell_exported = env_in_process and not env_in_dotenv
|
||||
|
||||
cleared = remove_env_value(env_var)
|
||||
if cleared:
|
||||
result.cleaned.append(f"Cleared {env_var} from .env")
|
||||
|
||||
if shell_exported:
|
||||
result.hints.extend([
|
||||
f"Note: {env_var} is still set in your shell environment "
|
||||
f"(not in ~/.hermes/.env).",
|
||||
" Unset it there (shell profile, systemd EnvironmentFile, "
|
||||
"launchd plist, etc.) or it will keep being visible to Hermes.",
|
||||
f" The pool entry is now suppressed — Hermes will ignore "
|
||||
f"{env_var} until you run `hermes auth add {provider}`.",
|
||||
])
|
||||
else:
|
||||
result.hints.append(
|
||||
f"Suppressed env:{env_var} — it will not be re-seeded even "
|
||||
f"if the variable is re-exported later."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _remove_claude_code(provider: str, removed) -> RemovalResult:
|
||||
"""~/.claude/.credentials.json is owned by Claude Code itself.
|
||||
|
||||
We don't delete it — the user's Claude Code install still needs to
|
||||
work. We just suppress it so Hermes stops reading it.
|
||||
"""
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed claude_code credential — it will not be re-seeded.",
|
||||
"Note: Claude Code credentials still live in ~/.claude/.credentials.json",
|
||||
"Run `hermes auth add anthropic` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_hermes_pkce(provider: str, removed) -> RemovalResult:
|
||||
"""~/.hermes/.anthropic_oauth.json is ours — delete it outright."""
|
||||
from hermes_constants import get_hermes_home
|
||||
|
||||
result = RemovalResult()
|
||||
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
|
||||
if oauth_file.exists():
|
||||
try:
|
||||
oauth_file.unlink()
|
||||
result.cleaned.append("Cleared Hermes Anthropic OAuth credentials")
|
||||
except OSError as exc:
|
||||
result.hints.append(f"Could not delete {oauth_file}: {exc}")
|
||||
return result
|
||||
|
||||
|
||||
def _clear_auth_store_provider(provider: str) -> bool:
|
||||
"""Delete auth_store.providers[provider]. Returns True if deleted."""
|
||||
from hermes_cli.auth import (
|
||||
_auth_store_lock,
|
||||
_load_auth_store,
|
||||
_save_auth_store,
|
||||
)
|
||||
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
providers_dict = auth_store.get("providers")
|
||||
if isinstance(providers_dict, dict) and provider in providers_dict:
|
||||
del providers_dict[provider]
|
||||
_save_auth_store(auth_store)
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _remove_nous_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Nous OAuth lives in auth.json providers.nous — clear it and suppress.
|
||||
|
||||
We suppress in addition to clearing because nothing else stops the
|
||||
user's next `hermes login` run from writing providers.nous again
|
||||
before they decide to. Suppression forces them to go through
|
||||
`hermes auth add nous` to re-engage, which is the documented re-add
|
||||
path and clears the suppression atomically.
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
return result
|
||||
|
||||
|
||||
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
|
||||
|
||||
refresh_codex_oauth_pure() writes both every time, so clearing only
|
||||
the Hermes auth store is not enough — _seed_from_singletons() would
|
||||
re-import from ~/.codex/auth.json on the next load_pool() call and
|
||||
the removal would be instantly undone. We suppress instead of
|
||||
deleting Codex CLI's file, so the Codex CLI itself keeps working.
|
||||
|
||||
The canonical source name in ``_seed_from_singletons`` is
|
||||
``"device_code"`` (no prefix). Entries may show up in the pool as
|
||||
either ``"device_code"`` (seeded) or ``"manual:device_code"`` (added
|
||||
via ``hermes auth add openai-codex``), but in both cases the re-seed
|
||||
gate lives at the ``"device_code"`` suppression key. We suppress
|
||||
that canonical key here; the central dispatcher also suppresses
|
||||
``removed.source`` which is fine — belt-and-suspenders, idempotent.
|
||||
"""
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
# Suppress the canonical re-seed source, not just whatever source the
|
||||
# removed entry had. Otherwise `manual:device_code` removals wouldn't
|
||||
# block the `device_code` re-seed path.
|
||||
suppress_credential_source(provider, "device_code")
|
||||
result.hints.extend([
|
||||
"Suppressed openai-codex device_code source — it will not be re-seeded.",
|
||||
"Note: Codex CLI credentials still live in ~/.codex/auth.json",
|
||||
"Run `hermes auth add openai-codex` to re-enable if needed.",
|
||||
])
|
||||
return result
|
||||
|
||||
|
||||
def _remove_qwen_cli(provider: str, removed) -> RemovalResult:
|
||||
"""~/.qwen/oauth_creds.json is owned by the Qwen CLI.
|
||||
|
||||
Same pattern as claude_code — suppress, don't delete. The user's
|
||||
Qwen CLI install still reads from that file.
|
||||
"""
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed qwen-cli credential — it will not be re-seeded.",
|
||||
"Note: Qwen CLI credentials still live in ~/.qwen/oauth_creds.json",
|
||||
"Run `hermes auth add qwen-oauth` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_copilot_gh(provider: str, removed) -> RemovalResult:
|
||||
"""Copilot token comes from `gh auth token` or COPILOT_GITHUB_TOKEN / GH_TOKEN / GITHUB_TOKEN.
|
||||
|
||||
Copilot is special: the same token can be seeded as multiple source
|
||||
entries (gh_cli from ``_seed_from_singletons`` plus env:<VAR> from
|
||||
``_seed_from_env``), so removing one entry without suppressing the
|
||||
others lets the duplicates resurrect. We suppress ALL known copilot
|
||||
sources here so removal is stable regardless of which entry the
|
||||
user clicked.
|
||||
|
||||
We don't touch the user's gh CLI or shell state — just suppress so
|
||||
Hermes stops picking the token up.
|
||||
"""
|
||||
# Suppress ALL copilot source variants up-front so no path resurrects
|
||||
# the pool entry. The central dispatcher in auth_remove_command will
|
||||
# ALSO suppress removed.source, but it's idempotent so double-calling
|
||||
# is harmless.
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
suppress_credential_source(provider, "gh_cli")
|
||||
for env_var in ("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"):
|
||||
suppress_credential_source(provider, f"env:{env_var}")
|
||||
|
||||
return RemovalResult(hints=[
|
||||
"Suppressed all copilot token sources (gh_cli + env vars) — they will not be re-seeded.",
|
||||
"Note: Your gh CLI / shell environment is unchanged.",
|
||||
"Run `hermes auth add copilot` to re-enable if needed.",
|
||||
])
|
||||
|
||||
|
||||
def _remove_custom_config(provider: str, removed) -> RemovalResult:
|
||||
"""Custom provider pools are seeded from custom_providers config or
|
||||
model.api_key. Both are in config.yaml — modifying that from here
|
||||
is more invasive than suppression. We suppress; the user can edit
|
||||
config.yaml if they want to remove the key from disk entirely.
|
||||
"""
|
||||
source_label = removed.source
|
||||
return RemovalResult(hints=[
|
||||
f"Suppressed {source_label} — it will not be re-seeded.",
|
||||
"Note: The underlying value in config.yaml is unchanged. Edit it "
|
||||
"directly if you want to remove the credential from disk.",
|
||||
])
|
||||
|
||||
|
||||
def _register_all_sources() -> None:
|
||||
"""Called once on module import.
|
||||
|
||||
ORDER MATTERS — ``find_removal_step`` returns the first match. Put
|
||||
provider-specific steps before the generic ``env:*`` step so that e.g.
|
||||
copilot's ``env:GH_TOKEN`` goes through the copilot removal (which
|
||||
doesn't touch the user's shell), not the generic env-var removal
|
||||
(which would try to clear .env).
|
||||
"""
|
||||
register(RemovalStep(
|
||||
provider="copilot", source_id="gh_cli",
|
||||
match_fn=lambda src: src == "gh_cli" or src.startswith("env:"),
|
||||
remove_fn=_remove_copilot_gh,
|
||||
description="gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="env:",
|
||||
match_fn=lambda src: src.startswith("env:"),
|
||||
remove_fn=_remove_env_source,
|
||||
description="Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="anthropic", source_id="claude_code",
|
||||
remove_fn=_remove_claude_code,
|
||||
description="~/.claude/.credentials.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="anthropic", source_id="hermes_pkce",
|
||||
remove_fn=_remove_hermes_pkce,
|
||||
description="~/.hermes/.anthropic_oauth.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="nous", source_id="device_code",
|
||||
remove_fn=_remove_nous_device_code,
|
||||
description="auth.json providers.nous",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="openai-codex", source_id="device_code",
|
||||
match_fn=lambda src: src == "device_code" or src.endswith(":device_code"),
|
||||
remove_fn=_remove_codex_device_code,
|
||||
description="auth.json providers.openai-codex + ~/.codex/auth.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="qwen-oauth", source_id="qwen-cli",
|
||||
remove_fn=_remove_qwen_cli,
|
||||
description="~/.qwen/oauth_creds.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="*", source_id="config:",
|
||||
match_fn=lambda src: src.startswith("config:") or src == "model_config",
|
||||
remove_fn=_remove_custom_config,
|
||||
description="Custom provider config.yaml api_key field",
|
||||
))
|
||||
|
||||
|
||||
_register_all_sources()
|
||||
111
agent/file_safety.py
Normal file
111
agent/file_safety.py
Normal file
|
|
@ -0,0 +1,111 @@
|
|||
"""Shared file safety rules used by both tools and ACP shims."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
|
||||
def _hermes_home_path() -> Path:
|
||||
"""Resolve the active HERMES_HOME (profile-aware) without circular imports."""
|
||||
try:
|
||||
from hermes_constants import get_hermes_home # local import to avoid cycles
|
||||
return get_hermes_home()
|
||||
except Exception:
|
||||
return Path(os.path.expanduser("~/.hermes"))
|
||||
|
||||
|
||||
def build_write_denied_paths(home: str) -> set[str]:
|
||||
"""Return exact sensitive paths that must never be written."""
|
||||
hermes_home = _hermes_home_path()
|
||||
return {
|
||||
os.path.realpath(p)
|
||||
for p in [
|
||||
os.path.join(home, ".ssh", "authorized_keys"),
|
||||
os.path.join(home, ".ssh", "id_rsa"),
|
||||
os.path.join(home, ".ssh", "id_ed25519"),
|
||||
os.path.join(home, ".ssh", "config"),
|
||||
str(hermes_home / ".env"),
|
||||
os.path.join(home, ".bashrc"),
|
||||
os.path.join(home, ".zshrc"),
|
||||
os.path.join(home, ".profile"),
|
||||
os.path.join(home, ".bash_profile"),
|
||||
os.path.join(home, ".zprofile"),
|
||||
os.path.join(home, ".netrc"),
|
||||
os.path.join(home, ".pgpass"),
|
||||
os.path.join(home, ".npmrc"),
|
||||
os.path.join(home, ".pypirc"),
|
||||
"/etc/sudoers",
|
||||
"/etc/passwd",
|
||||
"/etc/shadow",
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def build_write_denied_prefixes(home: str) -> list[str]:
|
||||
"""Return sensitive directory prefixes that must never be written."""
|
||||
return [
|
||||
os.path.realpath(p) + os.sep
|
||||
for p in [
|
||||
os.path.join(home, ".ssh"),
|
||||
os.path.join(home, ".aws"),
|
||||
os.path.join(home, ".gnupg"),
|
||||
os.path.join(home, ".kube"),
|
||||
"/etc/sudoers.d",
|
||||
"/etc/systemd",
|
||||
os.path.join(home, ".docker"),
|
||||
os.path.join(home, ".azure"),
|
||||
os.path.join(home, ".config", "gh"),
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def get_safe_write_root() -> Optional[str]:
|
||||
"""Return the resolved HERMES_WRITE_SAFE_ROOT path, or None if unset."""
|
||||
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
|
||||
if not root:
|
||||
return None
|
||||
try:
|
||||
return os.path.realpath(os.path.expanduser(root))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def is_write_denied(path: str) -> bool:
|
||||
"""Return True if path is blocked by the write denylist or safe root."""
|
||||
home = os.path.realpath(os.path.expanduser("~"))
|
||||
resolved = os.path.realpath(os.path.expanduser(str(path)))
|
||||
|
||||
if resolved in build_write_denied_paths(home):
|
||||
return True
|
||||
for prefix in build_write_denied_prefixes(home):
|
||||
if resolved.startswith(prefix):
|
||||
return True
|
||||
|
||||
safe_root = get_safe_write_root()
|
||||
if safe_root and not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
|
||||
def get_read_block_error(path: str) -> Optional[str]:
|
||||
"""Return an error message when a read targets internal Hermes cache files."""
|
||||
resolved = Path(path).expanduser().resolve()
|
||||
hermes_home = _hermes_home_path().resolve()
|
||||
blocked_dirs = [
|
||||
hermes_home / "skills" / ".hub" / "index-cache",
|
||||
hermes_home / "skills" / ".hub",
|
||||
]
|
||||
for blocked in blocked_dirs:
|
||||
try:
|
||||
resolved.relative_to(blocked)
|
||||
except ValueError:
|
||||
continue
|
||||
return (
|
||||
f"Access denied: {path} is an internal Hermes cache file "
|
||||
"and cannot be read directly to prevent prompt injection. "
|
||||
"Use the skills_list or skill_view tools instead."
|
||||
)
|
||||
return None
|
||||
|
|
@ -170,6 +170,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"Qwen/Qwen3.5-35B-A3B": 131072,
|
||||
"deepseek-ai/DeepSeek-V3.2": 65536,
|
||||
"moonshotai/Kimi-K2.5": 262144,
|
||||
"moonshotai/Kimi-K2.6": 262144,
|
||||
"moonshotai/Kimi-K2-Thinking": 262144,
|
||||
"MiniMaxAI/MiniMax-M2.5": 204800,
|
||||
"XiaomiMiMo/MiMo-V2-Flash": 256000,
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ can invoke skills via /skill-name commands and prompt-only built-ins like
|
|||
import json
|
||||
import logging
|
||||
import re
|
||||
import subprocess
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional
|
||||
|
|
@ -22,6 +23,110 @@ _PLAN_SLUG_RE = re.compile(r"[^a-z0-9]+")
|
|||
_SKILL_INVALID_CHARS = re.compile(r"[^a-z0-9-]")
|
||||
_SKILL_MULTI_HYPHEN = re.compile(r"-{2,}")
|
||||
|
||||
# Matches ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} tokens in SKILL.md.
|
||||
# Tokens that don't resolve (e.g. ${HERMES_SESSION_ID} with no session) are
|
||||
# left as-is so the user can debug them.
|
||||
_SKILL_TEMPLATE_RE = re.compile(r"\$\{(HERMES_SKILL_DIR|HERMES_SESSION_ID)\}")
|
||||
|
||||
# Matches inline shell snippets like: !`date +%Y-%m-%d`
|
||||
# Non-greedy, single-line only — no newlines inside the backticks.
|
||||
_INLINE_SHELL_RE = re.compile(r"!`([^`\n]+)`")
|
||||
|
||||
# Cap inline-shell output so a runaway command can't blow out the context.
|
||||
_INLINE_SHELL_MAX_OUTPUT = 4000
|
||||
|
||||
|
||||
def _load_skills_config() -> dict:
|
||||
"""Load the ``skills`` section of config.yaml (best-effort)."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
skills_cfg = cfg.get("skills")
|
||||
if isinstance(skills_cfg, dict):
|
||||
return skills_cfg
|
||||
except Exception:
|
||||
logger.debug("Could not read skills config", exc_info=True)
|
||||
return {}
|
||||
|
||||
|
||||
def _substitute_template_vars(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
session_id: str | None,
|
||||
) -> str:
|
||||
"""Replace ${HERMES_SKILL_DIR} / ${HERMES_SESSION_ID} in skill content.
|
||||
|
||||
Only substitutes tokens for which a concrete value is available —
|
||||
unresolved tokens are left in place so the author can spot them.
|
||||
"""
|
||||
if not content:
|
||||
return content
|
||||
|
||||
skill_dir_str = str(skill_dir) if skill_dir else None
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
token = match.group(1)
|
||||
if token == "HERMES_SKILL_DIR" and skill_dir_str:
|
||||
return skill_dir_str
|
||||
if token == "HERMES_SESSION_ID" and session_id:
|
||||
return str(session_id)
|
||||
return match.group(0)
|
||||
|
||||
return _SKILL_TEMPLATE_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def _run_inline_shell(command: str, cwd: Path | None, timeout: int) -> str:
|
||||
"""Execute a single inline-shell snippet and return its stdout (trimmed).
|
||||
|
||||
Failures return a short ``[inline-shell error: ...]`` marker instead of
|
||||
raising, so one bad snippet can't wreck the whole skill message.
|
||||
"""
|
||||
try:
|
||||
completed = subprocess.run(
|
||||
["bash", "-c", command],
|
||||
cwd=str(cwd) if cwd else None,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=max(1, int(timeout)),
|
||||
check=False,
|
||||
)
|
||||
except subprocess.TimeoutExpired:
|
||||
return f"[inline-shell timeout after {timeout}s: {command}]"
|
||||
except FileNotFoundError:
|
||||
return f"[inline-shell error: bash not found]"
|
||||
except Exception as exc:
|
||||
return f"[inline-shell error: {exc}]"
|
||||
|
||||
output = (completed.stdout or "").rstrip("\n")
|
||||
if not output and completed.stderr:
|
||||
output = completed.stderr.rstrip("\n")
|
||||
if len(output) > _INLINE_SHELL_MAX_OUTPUT:
|
||||
output = output[:_INLINE_SHELL_MAX_OUTPUT] + "…[truncated]"
|
||||
return output
|
||||
|
||||
|
||||
def _expand_inline_shell(
|
||||
content: str,
|
||||
skill_dir: Path | None,
|
||||
timeout: int,
|
||||
) -> str:
|
||||
"""Replace every !`cmd` snippet in ``content`` with its stdout.
|
||||
|
||||
Runs each snippet with the skill directory as CWD so relative paths in
|
||||
the snippet work the way the author expects.
|
||||
"""
|
||||
if "!`" not in content:
|
||||
return content
|
||||
|
||||
def _replace(match: re.Match) -> str:
|
||||
cmd = match.group(1).strip()
|
||||
if not cmd:
|
||||
return ""
|
||||
return _run_inline_shell(cmd, skill_dir, timeout)
|
||||
|
||||
return _INLINE_SHELL_RE.sub(_replace, content)
|
||||
|
||||
|
||||
def build_plan_path(
|
||||
user_instruction: str = "",
|
||||
|
|
@ -133,14 +238,36 @@ def _build_skill_message(
|
|||
activation_note: str,
|
||||
user_instruction: str = "",
|
||||
runtime_note: str = "",
|
||||
session_id: str | None = None,
|
||||
) -> str:
|
||||
"""Format a loaded skill into a user/system message payload."""
|
||||
from tools.skills_tool import SKILLS_DIR
|
||||
|
||||
content = str(loaded_skill.get("content") or "")
|
||||
|
||||
# ── Template substitution and inline-shell expansion ──
|
||||
# Done before anything else so downstream blocks (setup notes,
|
||||
# supporting-file hints) see the expanded content.
|
||||
skills_cfg = _load_skills_config()
|
||||
if skills_cfg.get("template_vars", True):
|
||||
content = _substitute_template_vars(content, skill_dir, session_id)
|
||||
if skills_cfg.get("inline_shell", False):
|
||||
timeout = int(skills_cfg.get("inline_shell_timeout", 10) or 10)
|
||||
content = _expand_inline_shell(content, skill_dir, timeout)
|
||||
|
||||
parts = [activation_note, "", content.strip()]
|
||||
|
||||
# ── Inject the absolute skill directory so the agent can reference
|
||||
# bundled scripts without an extra skill_view() round-trip. ──
|
||||
if skill_dir:
|
||||
parts.append("")
|
||||
parts.append(f"[Skill directory: {skill_dir}]")
|
||||
parts.append(
|
||||
"Resolve any relative paths in this skill (e.g. `scripts/foo.js`, "
|
||||
"`templates/config.yaml`) against that directory, then run them "
|
||||
"with the terminal tool using the absolute path."
|
||||
)
|
||||
|
||||
# ── Inject resolved skill config values ──
|
||||
_inject_skill_config(loaded_skill, parts)
|
||||
|
||||
|
|
@ -188,11 +315,13 @@ def _build_skill_message(
|
|||
# Skill is from an external dir — use the skill name instead
|
||||
skill_view_target = skill_dir.name
|
||||
parts.append("")
|
||||
parts.append("[This skill has supporting files you can load with the skill_view tool:]")
|
||||
parts.append("[This skill has supporting files:]")
|
||||
for sf in supporting:
|
||||
parts.append(f"- {sf}")
|
||||
parts.append(f"- {sf} -> {skill_dir / sf}")
|
||||
parts.append(
|
||||
f'\nTo view any of these, use: skill_view(name="{skill_view_target}", file_path="<path>")'
|
||||
f'\nLoad any of these with skill_view(name="{skill_view_target}", '
|
||||
f'file_path="<path>"), or run scripts directly by absolute path '
|
||||
f"(e.g. `node {skill_dir}/scripts/foo.js`)."
|
||||
)
|
||||
|
||||
if user_instruction:
|
||||
|
|
@ -332,6 +461,7 @@ def build_skill_invocation_message(
|
|||
activation_note,
|
||||
user_instruction=user_instruction,
|
||||
runtime_note=runtime_note,
|
||||
session_id=task_id,
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -370,6 +500,7 @@ def build_preloaded_skills_prompt(
|
|||
loaded_skill,
|
||||
skill_dir,
|
||||
activation_note,
|
||||
session_id=task_id,
|
||||
)
|
||||
)
|
||||
loaded_names.append(skill_name)
|
||||
|
|
|
|||
39
agent/transports/__init__.py
Normal file
39
agent/transports/__init__.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
"""Transport layer types and registry for provider response normalization.
|
||||
|
||||
Usage:
|
||||
from agent.transports import get_transport
|
||||
transport = get_transport("anthropic_messages")
|
||||
result = transport.normalize_response(raw_response)
|
||||
"""
|
||||
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
|
||||
|
||||
_REGISTRY: dict = {}
|
||||
|
||||
|
||||
def register_transport(api_mode: str, transport_cls: type) -> None:
|
||||
"""Register a transport class for an api_mode string."""
|
||||
_REGISTRY[api_mode] = transport_cls
|
||||
|
||||
|
||||
def get_transport(api_mode: str):
|
||||
"""Get a transport instance for the given api_mode.
|
||||
|
||||
Returns None if no transport is registered for this api_mode.
|
||||
This allows gradual migration — call sites can check for None
|
||||
and fall back to the legacy code path.
|
||||
"""
|
||||
if not _REGISTRY:
|
||||
_discover_transports()
|
||||
cls = _REGISTRY.get(api_mode)
|
||||
if cls is None:
|
||||
return None
|
||||
return cls()
|
||||
|
||||
|
||||
def _discover_transports() -> None:
|
||||
"""Import all transport modules to trigger auto-registration."""
|
||||
try:
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
except ImportError:
|
||||
pass
|
||||
129
agent/transports/anthropic.py
Normal file
129
agent/transports/anthropic.py
Normal file
|
|
@ -0,0 +1,129 @@
|
|||
"""Anthropic Messages API transport.
|
||||
|
||||
Delegates to the existing adapter functions in agent/anthropic_adapter.py.
|
||||
This transport owns format conversion and normalization — NOT client lifecycle.
|
||||
"""
|
||||
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse
|
||||
|
||||
|
||||
class AnthropicTransport(ProviderTransport):
|
||||
"""Transport for api_mode='anthropic_messages'.
|
||||
|
||||
Wraps the existing functions in anthropic_adapter.py behind the
|
||||
ProviderTransport ABC. Each method delegates — no logic is duplicated.
|
||||
"""
|
||||
|
||||
@property
|
||||
def api_mode(self) -> str:
|
||||
return "anthropic_messages"
|
||||
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
||||
"""Convert OpenAI messages to Anthropic (system, messages) tuple.
|
||||
|
||||
kwargs:
|
||||
base_url: Optional[str] — affects thinking signature handling.
|
||||
"""
|
||||
from agent.anthropic_adapter import convert_messages_to_anthropic
|
||||
|
||||
base_url = kwargs.get("base_url")
|
||||
return convert_messages_to_anthropic(messages, base_url=base_url)
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
||||
"""Convert OpenAI tool schemas to Anthropic input_schema format."""
|
||||
from agent.anthropic_adapter import convert_tools_to_anthropic
|
||||
|
||||
return convert_tools_to_anthropic(tools)
|
||||
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build Anthropic messages.create() kwargs.
|
||||
|
||||
Calls convert_messages and convert_tools internally.
|
||||
|
||||
params (all optional):
|
||||
max_tokens: int
|
||||
reasoning_config: dict | None
|
||||
tool_choice: str | None
|
||||
is_oauth: bool
|
||||
preserve_dots: bool
|
||||
context_length: int | None
|
||||
base_url: str | None
|
||||
fast_mode: bool
|
||||
"""
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
|
||||
return build_anthropic_kwargs(
|
||||
model=model,
|
||||
messages=messages,
|
||||
tools=tools,
|
||||
max_tokens=params.get("max_tokens", 16384),
|
||||
reasoning_config=params.get("reasoning_config"),
|
||||
tool_choice=params.get("tool_choice"),
|
||||
is_oauth=params.get("is_oauth", False),
|
||||
preserve_dots=params.get("preserve_dots", False),
|
||||
context_length=params.get("context_length"),
|
||||
base_url=params.get("base_url"),
|
||||
fast_mode=params.get("fast_mode", False),
|
||||
)
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize Anthropic response to NormalizedResponse.
|
||||
|
||||
kwargs:
|
||||
strip_tool_prefix: bool — strip 'mcp_mcp_' prefixes from tool names.
|
||||
"""
|
||||
from agent.anthropic_adapter import normalize_anthropic_response_v2
|
||||
|
||||
strip_tool_prefix = kwargs.get("strip_tool_prefix", False)
|
||||
return normalize_anthropic_response_v2(response, strip_tool_prefix=strip_tool_prefix)
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Check Anthropic response structure is valid."""
|
||||
if response is None:
|
||||
return False
|
||||
content_blocks = getattr(response, "content", None)
|
||||
if not isinstance(content_blocks, list):
|
||||
return False
|
||||
if not content_blocks:
|
||||
return False
|
||||
return True
|
||||
|
||||
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
|
||||
"""Extract Anthropic cache_read and cache_creation token counts."""
|
||||
usage = getattr(response, "usage", None)
|
||||
if usage is None:
|
||||
return None
|
||||
cached = getattr(usage, "cache_read_input_tokens", 0) or 0
|
||||
written = getattr(usage, "cache_creation_input_tokens", 0) or 0
|
||||
if cached or written:
|
||||
return {"cached_tokens": cached, "creation_tokens": written}
|
||||
return None
|
||||
|
||||
# Promote the adapter's canonical mapping to module level so it's shared
|
||||
_STOP_REASON_MAP = {
|
||||
"end_turn": "stop",
|
||||
"tool_use": "tool_calls",
|
||||
"max_tokens": "length",
|
||||
"stop_sequence": "stop",
|
||||
"refusal": "content_filter",
|
||||
"model_context_window_exceeded": "length",
|
||||
}
|
||||
|
||||
def map_finish_reason(self, raw_reason: str) -> str:
|
||||
"""Map Anthropic stop_reason to OpenAI finish_reason."""
|
||||
return self._STOP_REASON_MAP.get(raw_reason, "stop")
|
||||
|
||||
|
||||
# Auto-register on import
|
||||
from agent.transports import register_transport # noqa: E402
|
||||
|
||||
register_transport("anthropic_messages", AnthropicTransport)
|
||||
89
agent/transports/base.py
Normal file
89
agent/transports/base.py
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
"""Abstract base for provider transports.
|
||||
|
||||
A transport owns the data path for one api_mode:
|
||||
convert_messages → convert_tools → build_kwargs → normalize_response
|
||||
|
||||
It does NOT own: client construction, streaming, credential refresh,
|
||||
prompt caching, interrupt handling, or retry logic. Those stay on AIAgent.
|
||||
"""
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from agent.transports.types import NormalizedResponse
|
||||
|
||||
|
||||
class ProviderTransport(ABC):
|
||||
"""Base class for provider-specific format conversion and normalization."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def api_mode(self) -> str:
|
||||
"""The api_mode string this transport handles (e.g. 'anthropic_messages')."""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
||||
"""Convert OpenAI-format messages to provider-native format.
|
||||
|
||||
Returns provider-specific structure (e.g. (system, messages) for Anthropic,
|
||||
or the messages list unchanged for chat_completions).
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
||||
"""Convert OpenAI-format tool definitions to provider-native format.
|
||||
|
||||
Returns provider-specific tool list (e.g. Anthropic input_schema format).
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def build_kwargs(
|
||||
self,
|
||||
model: str,
|
||||
messages: List[Dict[str, Any]],
|
||||
tools: Optional[List[Dict[str, Any]]] = None,
|
||||
**params,
|
||||
) -> Dict[str, Any]:
|
||||
"""Build the complete API call kwargs dict.
|
||||
|
||||
This is the primary entry point — it typically calls convert_messages()
|
||||
and convert_tools() internally, then adds model-specific config.
|
||||
|
||||
Returns a dict ready to be passed to the provider's SDK client.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
"""Normalize a raw provider response to the shared NormalizedResponse type.
|
||||
|
||||
This is the only method that returns a transport-layer type.
|
||||
"""
|
||||
...
|
||||
|
||||
def validate_response(self, response: Any) -> bool:
|
||||
"""Optional: check if the raw response is structurally valid.
|
||||
|
||||
Returns True if valid, False if the response should be treated as invalid.
|
||||
Default implementation always returns True.
|
||||
"""
|
||||
return True
|
||||
|
||||
def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
|
||||
"""Optional: extract provider-specific cache hit/creation stats.
|
||||
|
||||
Returns dict with 'cached_tokens' and 'creation_tokens', or None.
|
||||
Default returns None.
|
||||
"""
|
||||
return None
|
||||
|
||||
def map_finish_reason(self, raw_reason: str) -> str:
|
||||
"""Optional: map provider-specific stop reason to OpenAI equivalent.
|
||||
|
||||
Default returns the raw reason unchanged. Override for providers
|
||||
with different stop reason vocabularies.
|
||||
"""
|
||||
return raw_reason
|
||||
100
agent/transports/types.py
Normal file
100
agent/transports/types.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
"""Shared types for normalized provider responses.
|
||||
|
||||
These dataclasses define the canonical shape that all provider adapters
|
||||
normalize responses to. The shared surface is intentionally minimal —
|
||||
only fields that every downstream consumer reads are top-level.
|
||||
Protocol-specific state goes in ``provider_data`` dicts (response-level
|
||||
and per-tool-call) so that protocol-aware code paths can access it
|
||||
without polluting the shared type.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class ToolCall:
|
||||
"""A normalized tool call from any provider.
|
||||
|
||||
``id`` is the protocol's canonical identifier — what gets used in
|
||||
``tool_call_id`` / ``tool_use_id`` when constructing tool result
|
||||
messages. May be ``None`` when the provider omits it; the agent
|
||||
fills it via ``_deterministic_call_id()`` before storing in history.
|
||||
|
||||
``provider_data`` carries per-tool-call protocol metadata that only
|
||||
protocol-aware code reads:
|
||||
|
||||
* Codex: ``{"call_id": "call_XXX", "response_item_id": "fc_XXX"}``
|
||||
* Gemini: ``{"extra_content": {"google": {"thought_signature": "..."}}}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
id: Optional[str]
|
||||
name: str
|
||||
arguments: str # JSON string
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
|
||||
|
||||
@dataclass
|
||||
class Usage:
|
||||
"""Token usage from an API response."""
|
||||
|
||||
prompt_tokens: int = 0
|
||||
completion_tokens: int = 0
|
||||
total_tokens: int = 0
|
||||
cached_tokens: int = 0
|
||||
|
||||
|
||||
@dataclass
|
||||
class NormalizedResponse:
|
||||
"""Normalized API response from any provider.
|
||||
|
||||
Shared fields are truly cross-provider — every caller can rely on
|
||||
them without branching on api_mode. Protocol-specific state goes in
|
||||
``provider_data`` so that only protocol-aware code paths read it.
|
||||
|
||||
Response-level ``provider_data`` examples:
|
||||
|
||||
* Anthropic: ``{"reasoning_details": [...]}``
|
||||
* Codex: ``{"codex_reasoning_items": [...]}``
|
||||
* Others: ``None``
|
||||
"""
|
||||
|
||||
content: Optional[str]
|
||||
tool_calls: Optional[List[ToolCall]]
|
||||
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
|
||||
reasoning: Optional[str] = None
|
||||
usage: Optional[Usage] = None
|
||||
provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Factory helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_tool_call(
|
||||
id: Optional[str],
|
||||
name: str,
|
||||
arguments: Any,
|
||||
**provider_fields: Any,
|
||||
) -> ToolCall:
|
||||
"""Build a ``ToolCall``, auto-serialising *arguments* if it's a dict.
|
||||
|
||||
Any extra keyword arguments are collected into ``provider_data``.
|
||||
"""
|
||||
args_str = json.dumps(arguments) if isinstance(arguments, dict) else str(arguments)
|
||||
pd = dict(provider_fields) if provider_fields else None
|
||||
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
|
||||
|
||||
|
||||
def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
|
||||
"""Translate a provider-specific stop reason to the normalised set.
|
||||
|
||||
Falls back to ``"stop"`` for unknown or ``None`` reasons.
|
||||
"""
|
||||
if reason is None:
|
||||
return "stop"
|
||||
return mapping.get(reason, "stop")
|
||||
214
cli.py
214
cli.py
|
|
@ -19,6 +19,7 @@ import shutil
|
|||
import sys
|
||||
import json
|
||||
import re
|
||||
import concurrent.futures
|
||||
import base64
|
||||
import atexit
|
||||
import tempfile
|
||||
|
|
@ -65,6 +66,7 @@ from agent.usage_pricing import (
|
|||
format_duration_compact,
|
||||
format_token_count_compact,
|
||||
)
|
||||
from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||
from hermes_cli.banner import _format_context_length, format_banner_version_label
|
||||
|
||||
_COMMAND_SPINNER_FRAMES = ("⠋", "⠙", "⠹", "⠸", "⠼", "⠴", "⠦", "⠧", "⠇", "⠏")
|
||||
|
|
@ -5271,6 +5273,30 @@ class HermesCLI:
|
|||
except Exception:
|
||||
return False
|
||||
|
||||
def _should_handle_steer_command_inline(self, text: str, has_images: bool = False) -> bool:
|
||||
"""Return True when /steer should be dispatched immediately while the agent is running.
|
||||
|
||||
/steer MUST bypass the normal _pending_input → process_loop path when
|
||||
the agent is active, because process_loop is blocked inside
|
||||
self.chat() for the duration of the run. By the time the queued
|
||||
command is pulled from _pending_input, _agent_running has already
|
||||
flipped back to False, and process_command() takes the idle
|
||||
fallback — delivering the steer as a next-turn message instead of
|
||||
injecting it mid-run. Dispatching inline on the UI thread calls
|
||||
agent.steer() directly, which is thread-safe (uses _pending_steer_lock).
|
||||
"""
|
||||
if not text or has_images or not _looks_like_slash_command(text):
|
||||
return False
|
||||
if not getattr(self, "_agent_running", False):
|
||||
return False
|
||||
try:
|
||||
from hermes_cli.commands import resolve_command
|
||||
base = text.split(None, 1)[0].lower().lstrip('/')
|
||||
cmd = resolve_command(base)
|
||||
return bool(cmd and cmd.name == "steer")
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _show_model_and_providers(self):
|
||||
"""Show current model + provider and list all authenticated providers.
|
||||
|
||||
|
|
@ -7022,6 +7048,27 @@ class HermesCLI:
|
|||
if cost_result.status == "unknown":
|
||||
print(f" Note: Pricing unknown for {agent.model}")
|
||||
|
||||
# Account limits -- fetched off-thread with a hard timeout so slow
|
||||
# provider APIs don't hang the prompt.
|
||||
provider = getattr(agent, "provider", None) or getattr(self, "provider", None)
|
||||
base_url = getattr(agent, "base_url", None) or getattr(self, "base_url", None)
|
||||
api_key = getattr(agent, "api_key", None) or getattr(self, "api_key", None)
|
||||
account_snapshot = None
|
||||
if provider:
|
||||
with concurrent.futures.ThreadPoolExecutor(max_workers=1) as _pool:
|
||||
try:
|
||||
account_snapshot = _pool.submit(
|
||||
fetch_account_usage, provider,
|
||||
base_url=base_url, api_key=api_key,
|
||||
).result(timeout=10.0)
|
||||
except (concurrent.futures.TimeoutError, Exception):
|
||||
account_snapshot = None
|
||||
account_lines = [f" {line}" for line in render_account_usage_lines(account_snapshot)]
|
||||
if account_lines:
|
||||
print()
|
||||
for line in account_lines:
|
||||
print(line)
|
||||
|
||||
if self.verbose:
|
||||
logging.getLogger().setLevel(logging.DEBUG)
|
||||
for noisy in ('openai', 'openai._base_client', 'httpx', 'httpcore', 'asyncio', 'hpack', 'grpc', 'modal'):
|
||||
|
|
@ -7398,11 +7445,12 @@ class HermesCLI:
|
|||
self._voice_stop_and_transcribe()
|
||||
|
||||
# Audio cue: single beep BEFORE starting stream (avoid CoreAudio conflict)
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
play_beep(frequency=880, count=1)
|
||||
except Exception:
|
||||
pass
|
||||
if self._voice_beeps_enabled():
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
play_beep(frequency=880, count=1)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
self._voice_recorder.start(on_silence_stop=_on_silence)
|
||||
|
|
@ -7450,11 +7498,12 @@ class HermesCLI:
|
|||
wav_path = self._voice_recorder.stop()
|
||||
|
||||
# Audio cue: double beep after stream stopped (no CoreAudio conflict)
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
play_beep(frequency=660, count=2)
|
||||
except Exception:
|
||||
pass
|
||||
if self._voice_beeps_enabled():
|
||||
try:
|
||||
from tools.voice_mode import play_beep
|
||||
play_beep(frequency=660, count=2)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if wav_path is None:
|
||||
_cprint(f"{_DIM}No speech detected.{_RST}")
|
||||
|
|
@ -7604,6 +7653,17 @@ class HermesCLI:
|
|||
_cprint(f"Unknown voice subcommand: {subcommand}")
|
||||
_cprint("Usage: /voice [on|off|tts|status]")
|
||||
|
||||
def _voice_beeps_enabled(self) -> bool:
|
||||
"""Return whether CLI voice mode should play record start/stop beeps."""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
voice_cfg = load_config().get("voice", {})
|
||||
if isinstance(voice_cfg, dict):
|
||||
return bool(voice_cfg.get("beep_enabled", True))
|
||||
except Exception:
|
||||
pass
|
||||
return True
|
||||
|
||||
def _enable_voice_mode(self):
|
||||
"""Enable voice mode after checking requirements."""
|
||||
if self._voice_mode:
|
||||
|
|
@ -8007,8 +8067,18 @@ class HermesCLI:
|
|||
choice_wrapped: list[tuple[int, str]] = []
|
||||
for i, choice in enumerate(choices):
|
||||
label = choice_labels.get(choice, choice)
|
||||
prefix = '❯ ' if i == selected else ' '
|
||||
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
|
||||
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
|
||||
if i < 9:
|
||||
num_prefix = str(i + 1)
|
||||
elif i == 9:
|
||||
num_prefix = '0'
|
||||
else:
|
||||
num_prefix = ' ' # No number for items beyond 10th
|
||||
if i == selected:
|
||||
prefix = f'❯ {num_prefix}. '
|
||||
else:
|
||||
prefix = f' {num_prefix}. '
|
||||
for wrapped in _wrap_panel_text(f"{prefix}{label}", inner_text_width, subsequent_indent=" "):
|
||||
choice_wrapped.append((i, wrapped))
|
||||
|
||||
# Budget vertical space so HSplit never clips the command or choices.
|
||||
|
|
@ -9075,6 +9145,17 @@ class HermesCLI:
|
|||
event.app.current_buffer.reset(append_to_history=True)
|
||||
return
|
||||
|
||||
# Handle /steer while the agent is running immediately on the
|
||||
# UI thread. Queuing through _pending_input would deadlock the
|
||||
# steer until after the agent loop finishes (process_loop is
|
||||
# blocked inside self.chat()), which turns /steer into a
|
||||
# post-run next-turn message — defeating mid-run injection.
|
||||
# agent.steer() is thread-safe (holds _pending_steer_lock).
|
||||
if self._should_handle_steer_command_inline(text, has_images=has_images):
|
||||
self.process_command(text)
|
||||
event.app.current_buffer.reset(append_to_history=True)
|
||||
return
|
||||
|
||||
# Snapshot and clear attached images
|
||||
images = list(self._attached_images)
|
||||
self._attached_images.clear()
|
||||
|
|
@ -9172,6 +9253,29 @@ class HermesCLI:
|
|||
self._clarify_state["selected"] = min(max_idx, self._clarify_state["selected"] + 1)
|
||||
event.app.invalidate()
|
||||
|
||||
# Number keys for quick clarify selection (1-9, 0 for 10th item)
|
||||
def _make_clarify_number_handler(idx):
|
||||
def handler(event):
|
||||
if self._clarify_state and not self._clarify_freetext:
|
||||
choices = self._clarify_state.get("choices") or []
|
||||
# Map index to choice (treating "Other" as the last option)
|
||||
if idx < len(choices):
|
||||
# Select a numbered choice
|
||||
self._clarify_state["response_queue"].put(choices[idx])
|
||||
self._clarify_state = None
|
||||
self._clarify_freetext = False
|
||||
event.app.invalidate()
|
||||
elif idx == len(choices):
|
||||
# Select "Other" option
|
||||
self._clarify_freetext = True
|
||||
event.app.invalidate()
|
||||
return handler
|
||||
|
||||
for _num in range(10):
|
||||
# 1-9 select items 0-8, 0 selects item 9 (10thitem)
|
||||
_idx = 9 if _num == 0 else _num - 1
|
||||
kb.add(str(_num), filter=Condition(lambda: bool(self._clarify_state) and not self._clarify_freetext))(_make_clarify_number_handler(_idx))
|
||||
|
||||
# --- Dangerous command approval: arrow-key navigation ---
|
||||
|
||||
@kb.add('up', filter=Condition(lambda: bool(self._approval_state)))
|
||||
|
|
@ -9213,6 +9317,20 @@ class HermesCLI:
|
|||
event.app.current_buffer.reset()
|
||||
event.app.invalidate()
|
||||
|
||||
# Number keys for quick approval selection (1-9, 0 for 10th item)
|
||||
def _make_approval_number_handler(idx):
|
||||
def handler(event):
|
||||
if self._approval_state and idx < len(self._approval_state["choices"]):
|
||||
self._approval_state["selected"] = idx
|
||||
self._handle_approval_selection()
|
||||
event.app.invalidate()
|
||||
return handler
|
||||
|
||||
for _num in range(10):
|
||||
# 1-9 select items 0-8, 0 selects item 9 (10th item)
|
||||
_idx = 9 if _num == 0 else _num - 1
|
||||
kb.add(str(_num), filter=Condition(lambda: bool(self._approval_state)))(_make_approval_number_handler(_idx))
|
||||
|
||||
# --- History navigation: up/down browse history in normal input mode ---
|
||||
# The TextArea is multiline, so by default up/down only move the cursor.
|
||||
# Buffer.auto_up/auto_down handle both: cursor movement when multi-line,
|
||||
|
|
@ -9781,14 +9899,32 @@ class HermesCLI:
|
|||
selected = state.get("selected", 0)
|
||||
preview_lines = _wrap_panel_text(question, 60)
|
||||
for i, choice in enumerate(choices):
|
||||
prefix = "❯ " if i == selected and not cli_ref._clarify_freetext else " "
|
||||
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" "))
|
||||
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
|
||||
if i < 9:
|
||||
num_prefix = str(i + 1)
|
||||
elif i == 9:
|
||||
num_prefix = '0'
|
||||
else:
|
||||
num_prefix = ' '
|
||||
if i == selected and not cli_ref._clarify_freetext:
|
||||
prefix = f"❯ {num_prefix}. "
|
||||
else:
|
||||
prefix = f" {num_prefix}. "
|
||||
preview_lines.extend(_wrap_panel_text(f"{prefix}{choice}", 60, subsequent_indent=" "))
|
||||
# "Other" option in preview
|
||||
other_num = len(choices) + 1
|
||||
if other_num < 10:
|
||||
other_num_prefix = str(other_num)
|
||||
elif other_num == 10:
|
||||
other_num_prefix = '0'
|
||||
else:
|
||||
other_num_prefix = ' '
|
||||
other_label = (
|
||||
"❯ Other (type below)" if cli_ref._clarify_freetext
|
||||
else "❯ Other (type your answer)" if selected == len(choices)
|
||||
else " Other (type your answer)"
|
||||
f"❯ {other_num_prefix}. Other (type below)" if cli_ref._clarify_freetext
|
||||
else f"❯ {other_num_prefix}. Other (type your answer)" if selected == len(choices)
|
||||
else f" {other_num_prefix}. Other (type your answer)"
|
||||
)
|
||||
preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" "))
|
||||
preview_lines.extend(_wrap_panel_text(other_label, 60, subsequent_indent=" "))
|
||||
box_width = _panel_box_width("Hermes needs your input", preview_lines)
|
||||
inner_text_width = max(8, box_width - 2)
|
||||
|
||||
|
|
@ -9796,18 +9932,35 @@ class HermesCLI:
|
|||
choice_wrapped: list[tuple[int, str]] = []
|
||||
if choices:
|
||||
for i, choice in enumerate(choices):
|
||||
prefix = '❯ ' if i == selected and not cli_ref._clarify_freetext else ' '
|
||||
for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "):
|
||||
# Show number prefix for quick selection (1-9 for items 1-9, 0 for 10th item)
|
||||
if i < 9:
|
||||
num_prefix = str(i + 1)
|
||||
elif i == 9:
|
||||
num_prefix = '0'
|
||||
else:
|
||||
num_prefix = ' '
|
||||
if i == selected and not cli_ref._clarify_freetext:
|
||||
prefix = f'❯ {num_prefix}. '
|
||||
else:
|
||||
prefix = f' {num_prefix}. '
|
||||
for wrapped in _wrap_panel_text(f"{prefix}{choice}", inner_text_width, subsequent_indent=" "):
|
||||
choice_wrapped.append((i, wrapped))
|
||||
# Trailing Other row(s)
|
||||
other_idx = len(choices)
|
||||
if selected == other_idx and not cli_ref._clarify_freetext:
|
||||
other_label_mand = '❯ Other (type your answer)'
|
||||
elif cli_ref._clarify_freetext:
|
||||
other_label_mand = '❯ Other (type below)'
|
||||
other_num = other_idx + 1
|
||||
if other_num < 10:
|
||||
other_num_prefix = str(other_num)
|
||||
elif other_num == 10:
|
||||
other_num_prefix = '0'
|
||||
else:
|
||||
other_label_mand = ' Other (type your answer)'
|
||||
other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ")
|
||||
other_num_prefix = ' '
|
||||
if selected == other_idx and not cli_ref._clarify_freetext:
|
||||
other_label_mand = f'❯ {other_num_prefix}. Other (type your answer)'
|
||||
elif cli_ref._clarify_freetext:
|
||||
other_label_mand = f'❯ {other_num_prefix}. Other (type below)'
|
||||
else:
|
||||
other_label_mand = f' {other_num_prefix}. Other (type your answer)'
|
||||
other_wrapped = _wrap_panel_text(other_label_mand, inner_text_width, subsequent_indent=" ")
|
||||
elif cli_ref._clarify_freetext:
|
||||
# Freetext-only mode: the guidance line takes the place of choices.
|
||||
other_wrapped = _wrap_panel_text(
|
||||
|
|
@ -9872,6 +10025,15 @@ class HermesCLI:
|
|||
|
||||
# "Other" option (trailing row(s), only shown when choices exist)
|
||||
other_idx = len(choices)
|
||||
# Calculate number prefix for "Other" option
|
||||
other_num = other_idx + 1
|
||||
if other_num < 10:
|
||||
other_num_prefix = str(other_num)
|
||||
elif other_num == 10:
|
||||
other_num_prefix = '0'
|
||||
else:
|
||||
other_num_prefix = ' '
|
||||
|
||||
if selected == other_idx and not cli_ref._clarify_freetext:
|
||||
other_style = 'class:clarify-selected'
|
||||
elif cli_ref._clarify_freetext:
|
||||
|
|
|
|||
|
|
@ -252,7 +252,11 @@ def _send_media_via_adapter(adapter, chat_id: str, media_files: list, metadata:
|
|||
coro = adapter.send_document(chat_id=chat_id, file_path=media_path, metadata=metadata)
|
||||
|
||||
future = asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
result = future.result(timeout=30)
|
||||
try:
|
||||
result = future.result(timeout=30)
|
||||
except TimeoutError:
|
||||
future.cancel()
|
||||
raise
|
||||
if result and not getattr(result, "success", True):
|
||||
logger.warning(
|
||||
"Job '%s': media send failed for %s: %s",
|
||||
|
|
@ -382,7 +386,11 @@ def _deliver_result(job: dict, content: str, adapters=None, loop=None) -> Option
|
|||
runtime_adapter.send(chat_id, text_to_send, metadata=send_metadata),
|
||||
loop,
|
||||
)
|
||||
send_result = future.result(timeout=60)
|
||||
try:
|
||||
send_result = future.result(timeout=60)
|
||||
except TimeoutError:
|
||||
future.cancel()
|
||||
raise
|
||||
if send_result and not getattr(send_result, "success", True):
|
||||
err = getattr(send_result, "error", "unknown")
|
||||
logger.warning(
|
||||
|
|
|
|||
|
|
@ -19,6 +19,8 @@ import uuid
|
|||
from abc import ABC, abstractmethod
|
||||
from urllib.parse import urlsplit
|
||||
|
||||
from utils import normalize_proxy_url
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
|
|
@ -159,13 +161,13 @@ def resolve_proxy_url(platform_env_var: str | None = None) -> str | None:
|
|||
if platform_env_var:
|
||||
value = (os.environ.get(platform_env_var) or "").strip()
|
||||
if value:
|
||||
return value
|
||||
return normalize_proxy_url(value)
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = (os.environ.get(key) or "").strip()
|
||||
if value:
|
||||
return value
|
||||
return _detect_macos_system_proxy()
|
||||
return normalize_proxy_url(value)
|
||||
return normalize_proxy_url(_detect_macos_system_proxy())
|
||||
|
||||
|
||||
def proxy_kwargs_for_bot(proxy_url: str | None) -> dict:
|
||||
|
|
|
|||
|
|
@ -794,8 +794,28 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
# Telegram pushes updates to our HTTP endpoint. This
|
||||
# enables cloud platforms (Fly.io, Railway) to auto-wake
|
||||
# suspended machines on inbound HTTP traffic.
|
||||
#
|
||||
# SECURITY: TELEGRAM_WEBHOOK_SECRET is REQUIRED. Without it,
|
||||
# python-telegram-bot passes secret_token=None and the
|
||||
# webhook endpoint accepts any HTTP POST — attackers can
|
||||
# inject forged updates as if from Telegram. Refuse to
|
||||
# start rather than silently run in fail-open mode.
|
||||
# See GHSA-3vpc-7q5r-276h.
|
||||
webhook_port = int(os.getenv("TELEGRAM_WEBHOOK_PORT", "8443"))
|
||||
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip() or None
|
||||
webhook_secret = os.getenv("TELEGRAM_WEBHOOK_SECRET", "").strip()
|
||||
if not webhook_secret:
|
||||
raise RuntimeError(
|
||||
"TELEGRAM_WEBHOOK_SECRET is required when "
|
||||
"TELEGRAM_WEBHOOK_URL is set. Without it, the "
|
||||
"webhook endpoint accepts forged updates from "
|
||||
"anyone who can reach it — see "
|
||||
"https://github.com/NousResearch/hermes-agent/"
|
||||
"security/advisories/GHSA-3vpc-7q5r-276h.\n\n"
|
||||
"Generate a secret and set it in your .env:\n"
|
||||
" export TELEGRAM_WEBHOOK_SECRET=\"$(openssl rand -hex 32)\"\n\n"
|
||||
"Then register it with Telegram when setting the "
|
||||
"webhook via setWebhook's secret_token parameter."
|
||||
)
|
||||
from urllib.parse import urlparse
|
||||
webhook_path = urlparse(webhook_url).path or "/telegram"
|
||||
|
||||
|
|
@ -2333,10 +2353,16 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
DMs remain unrestricted. Group/supergroup messages are accepted when:
|
||||
- the chat is explicitly allowlisted in ``free_response_chats``
|
||||
- ``require_mention`` is disabled
|
||||
- the message is a command
|
||||
- the message replies to the bot
|
||||
- the bot is @mentioned
|
||||
- the text/caption matches a configured regex wake-word pattern
|
||||
|
||||
When ``require_mention`` is enabled, slash commands are not given
|
||||
special treatment — they must pass the same mention/reply checks
|
||||
as any other group message. Users can still trigger commands via
|
||||
the Telegram bot menu (``/command@botname``) or by explicitly
|
||||
mentioning the bot (``@botname /command``), both of which are
|
||||
recognised as mentions by :meth:`_message_mentions_bot`.
|
||||
"""
|
||||
if not self._is_group_chat(message):
|
||||
return True
|
||||
|
|
@ -2351,8 +2377,6 @@ class TelegramAdapter(BasePlatformAdapter):
|
|||
return True
|
||||
if not self._telegram_require_mention():
|
||||
return True
|
||||
if is_command:
|
||||
return True
|
||||
if self._is_reply_to_bot(message):
|
||||
return True
|
||||
if self._message_mentions_bot(message):
|
||||
|
|
|
|||
103
gateway/run.py
103
gateway/run.py
|
|
@ -30,6 +30,8 @@ from pathlib import Path
|
|||
from datetime import datetime
|
||||
from typing import Dict, Optional, Any, List
|
||||
|
||||
from agent.account_usage import fetch_account_usage, render_account_usage_lines
|
||||
|
||||
# --- Agent cache tuning ---------------------------------------------------
|
||||
# Bounds the per-session AIAgent cache to prevent unbounded growth in
|
||||
# long-lived gateways (each AIAgent holds LLM clients, tool schemas,
|
||||
|
|
@ -279,6 +281,7 @@ from gateway.session import (
|
|||
build_session_context,
|
||||
build_session_context_prompt,
|
||||
build_session_key,
|
||||
is_shared_multi_user_session,
|
||||
)
|
||||
from gateway.delivery import DeliveryRouter
|
||||
from gateway.platforms.base import (
|
||||
|
|
@ -3791,12 +3794,12 @@ class GatewayRunner:
|
|||
history = history or []
|
||||
message_text = event.text or ""
|
||||
|
||||
_is_shared_thread = (
|
||||
source.chat_type != "dm"
|
||||
and source.thread_id
|
||||
and not getattr(self.config, "thread_sessions_per_user", False)
|
||||
_is_shared_multi_user = is_shared_multi_user_session(
|
||||
source,
|
||||
group_sessions_per_user=getattr(self.config, "group_sessions_per_user", True),
|
||||
thread_sessions_per_user=getattr(self.config, "thread_sessions_per_user", False),
|
||||
)
|
||||
if _is_shared_thread and source.user_name:
|
||||
if _is_shared_multi_user and source.user_name:
|
||||
message_text = f"[{source.user_name}] {message_text}"
|
||||
|
||||
if event.media_urls:
|
||||
|
|
@ -7263,6 +7266,38 @@ class GatewayRunner:
|
|||
if cached:
|
||||
agent = cached[0]
|
||||
|
||||
# Resolve provider/base_url/api_key for the account-usage fetch.
|
||||
# Prefer the live agent; fall back to persisted billing data on the
|
||||
# SessionDB row so `/usage` still returns account info between turns
|
||||
# when no agent is resident.
|
||||
provider = getattr(agent, "provider", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
|
||||
base_url = getattr(agent, "base_url", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
|
||||
api_key = getattr(agent, "api_key", None) if agent and agent is not _AGENT_PENDING_SENTINEL else None
|
||||
if not provider and getattr(self, "_session_db", None) is not None:
|
||||
try:
|
||||
_entry_for_billing = self.session_store.get_or_create_session(source)
|
||||
persisted = self._session_db.get_session(_entry_for_billing.session_id) or {}
|
||||
except Exception:
|
||||
persisted = {}
|
||||
provider = provider or persisted.get("billing_provider")
|
||||
base_url = base_url or persisted.get("billing_base_url")
|
||||
|
||||
# Fetch account usage off the event loop so slow provider APIs don't
|
||||
# block the gateway. Failures are non-fatal -- account_lines stays [].
|
||||
account_lines: list[str] = []
|
||||
if provider:
|
||||
try:
|
||||
account_snapshot = await asyncio.to_thread(
|
||||
fetch_account_usage,
|
||||
provider,
|
||||
base_url=base_url,
|
||||
api_key=api_key,
|
||||
)
|
||||
except Exception:
|
||||
account_snapshot = None
|
||||
if account_snapshot:
|
||||
account_lines = render_account_usage_lines(account_snapshot, markdown=True)
|
||||
|
||||
if agent and hasattr(agent, "session_total_tokens") and agent.session_api_calls > 0:
|
||||
lines = []
|
||||
|
||||
|
|
@ -7320,6 +7355,10 @@ class GatewayRunner:
|
|||
if ctx.compression_count:
|
||||
lines.append(f"Compressions: {ctx.compression_count}")
|
||||
|
||||
if account_lines:
|
||||
lines.append("")
|
||||
lines.extend(account_lines)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
# No agent at all -- check session history for a rough count
|
||||
|
|
@ -7329,12 +7368,18 @@ class GatewayRunner:
|
|||
from agent.model_metadata import estimate_messages_tokens_rough
|
||||
msgs = [m for m in history if m.get("role") in ("user", "assistant") and m.get("content")]
|
||||
approx = estimate_messages_tokens_rough(msgs)
|
||||
return (
|
||||
f"📊 **Session Info**\n"
|
||||
f"Messages: {len(msgs)}\n"
|
||||
f"Estimated context: ~{approx:,} tokens\n"
|
||||
f"_(Detailed usage available after the first agent response)_"
|
||||
)
|
||||
lines = [
|
||||
"📊 **Session Info**",
|
||||
f"Messages: {len(msgs)}",
|
||||
f"Estimated context: ~{approx:,} tokens",
|
||||
"_(Detailed usage available after the first agent response)_",
|
||||
]
|
||||
if account_lines:
|
||||
lines.append("")
|
||||
lines.extend(account_lines)
|
||||
return "\n".join(lines)
|
||||
if account_lines:
|
||||
return "\n".join(account_lines)
|
||||
return "No usage data available for this session."
|
||||
|
||||
async def _handle_insights_command(self, event: MessageEvent) -> str:
|
||||
|
|
@ -10774,6 +10819,12 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
except (ProcessLookupError, PermissionError, OSError):
|
||||
pass
|
||||
remove_pid_file()
|
||||
# remove_pid_file() is a no-op when the PID doesn't match.
|
||||
# Force-unlink to cover the old-process-crashed case.
|
||||
try:
|
||||
(get_hermes_home() / "gateway.pid").unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
# Clean up any takeover marker the old process didn't consume
|
||||
# (e.g. SIGKILL'd before its shutdown handler could read it).
|
||||
try:
|
||||
|
|
@ -10912,6 +10963,30 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
else:
|
||||
logger.info("Skipping signal handlers (not running in main thread).")
|
||||
|
||||
# Claim the PID file BEFORE bringing up any platform adapters.
|
||||
# This closes the --replace race window: two concurrent `gateway run
|
||||
# --replace` invocations both pass the termination-wait above, but
|
||||
# only the winner of the O_CREAT|O_EXCL race below will ever open
|
||||
# Telegram polling, Discord gateway sockets, etc. The loser exits
|
||||
# cleanly before touching any external service.
|
||||
import atexit
|
||||
from gateway.status import write_pid_file, remove_pid_file, get_running_pid
|
||||
_current_pid = get_running_pid()
|
||||
if _current_pid is not None and _current_pid != os.getpid():
|
||||
logger.error(
|
||||
"Another gateway instance (PID %d) started during our startup. "
|
||||
"Exiting to avoid double-running.", _current_pid
|
||||
)
|
||||
return False
|
||||
try:
|
||||
write_pid_file()
|
||||
except FileExistsError:
|
||||
logger.error(
|
||||
"PID file race lost to another gateway instance. Exiting."
|
||||
)
|
||||
return False
|
||||
atexit.register(remove_pid_file)
|
||||
|
||||
# Start the gateway
|
||||
success = await runner.start()
|
||||
if not success:
|
||||
|
|
@ -10921,12 +10996,6 @@ async def start_gateway(config: Optional[GatewayConfig] = None, replace: bool =
|
|||
logger.error("Gateway exiting cleanly: %s", runner.exit_reason)
|
||||
return True
|
||||
|
||||
# Write PID file so CLI can detect gateway is running
|
||||
import atexit
|
||||
from gateway.status import write_pid_file, remove_pid_file
|
||||
write_pid_file()
|
||||
atexit.register(remove_pid_file)
|
||||
|
||||
# Start background cron ticker so scheduled jobs fire automatically.
|
||||
# Pass the event loop so cron delivery can use live adapters (E2EE support).
|
||||
cron_stop = threading.Event()
|
||||
|
|
|
|||
|
|
@ -152,6 +152,7 @@ class SessionContext:
|
|||
source: SessionSource
|
||||
connected_platforms: List[Platform]
|
||||
home_channels: Dict[Platform, HomeChannel]
|
||||
shared_multi_user_session: bool = False
|
||||
|
||||
# Session metadata
|
||||
session_key: str = ""
|
||||
|
|
@ -166,6 +167,7 @@ class SessionContext:
|
|||
"home_channels": {
|
||||
p.value: hc.to_dict() for p, hc in self.home_channels.items()
|
||||
},
|
||||
"shared_multi_user_session": self.shared_multi_user_session,
|
||||
"session_key": self.session_key,
|
||||
"session_id": self.session_id,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None,
|
||||
|
|
@ -240,18 +242,16 @@ def build_session_context_prompt(
|
|||
lines.append(f"**Channel Topic:** {context.source.chat_topic}")
|
||||
|
||||
# User identity.
|
||||
# In shared thread sessions (non-DM with thread_id), multiple users
|
||||
# contribute to the same conversation. Don't pin a single user name
|
||||
# in the system prompt — it changes per-turn and would bust the prompt
|
||||
# cache. Instead, note that this is a multi-user thread; individual
|
||||
# sender names are prefixed on each user message by the gateway.
|
||||
_is_shared_thread = (
|
||||
context.source.chat_type != "dm"
|
||||
and context.source.thread_id
|
||||
)
|
||||
if _is_shared_thread:
|
||||
# In shared multi-user sessions (shared threads OR shared non-thread groups
|
||||
# when group_sessions_per_user=False), multiple users contribute to the same
|
||||
# conversation. Don't pin a single user name in the system prompt — it
|
||||
# changes per-turn and would bust the prompt cache. Instead, note that
|
||||
# this is a multi-user session; individual sender names are prefixed on
|
||||
# each user message by the gateway.
|
||||
if context.shared_multi_user_session:
|
||||
session_label = "Multi-user thread" if context.source.thread_id else "Multi-user session"
|
||||
lines.append(
|
||||
"**Session type:** Multi-user thread — messages are prefixed "
|
||||
f"**Session type:** {session_label} — messages are prefixed "
|
||||
"with [sender name]. Multiple users may participate."
|
||||
)
|
||||
elif context.source.user_name:
|
||||
|
|
@ -467,6 +467,27 @@ class SessionEntry:
|
|||
)
|
||||
|
||||
|
||||
def is_shared_multi_user_session(
|
||||
source: SessionSource,
|
||||
*,
|
||||
group_sessions_per_user: bool = True,
|
||||
thread_sessions_per_user: bool = False,
|
||||
) -> bool:
|
||||
"""Return True when a non-DM session is shared across participants.
|
||||
|
||||
Mirrors the isolation rules in :func:`build_session_key`:
|
||||
- DMs are never shared.
|
||||
- Threads are shared unless ``thread_sessions_per_user`` is True.
|
||||
- Non-thread group/channel sessions are shared unless
|
||||
``group_sessions_per_user`` is True (default: True = isolated).
|
||||
"""
|
||||
if source.chat_type == "dm":
|
||||
return False
|
||||
if source.thread_id:
|
||||
return not thread_sessions_per_user
|
||||
return not group_sessions_per_user
|
||||
|
||||
|
||||
def build_session_key(
|
||||
source: SessionSource,
|
||||
group_sessions_per_user: bool = True,
|
||||
|
|
@ -1238,6 +1259,11 @@ def build_session_context(
|
|||
source=source,
|
||||
connected_platforms=connected,
|
||||
home_channels=home_channels,
|
||||
shared_multi_user_session=is_shared_multi_user_session(
|
||||
source,
|
||||
group_sessions_per_user=getattr(config, "group_sessions_per_user", True),
|
||||
thread_sessions_per_user=getattr(config, "thread_sessions_per_user", False),
|
||||
),
|
||||
)
|
||||
|
||||
if session_entry:
|
||||
|
|
|
|||
|
|
@ -225,8 +225,28 @@ def _cleanup_invalid_pid_path(pid_path: Path, *, cleanup_stale: bool) -> None:
|
|||
|
||||
|
||||
def write_pid_file() -> None:
|
||||
"""Write the current process PID and metadata to the gateway PID file."""
|
||||
_write_json_file(_get_pid_path(), _build_pid_record())
|
||||
"""Write the current process PID and metadata to the gateway PID file.
|
||||
|
||||
Uses atomic O_CREAT | O_EXCL creation so that concurrent --replace
|
||||
invocations race: exactly one process wins and the rest get
|
||||
FileExistsError.
|
||||
"""
|
||||
path = _get_pid_path()
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
record = json.dumps(_build_pid_record())
|
||||
try:
|
||||
fd = os.open(path, os.O_CREAT | os.O_EXCL | os.O_WRONLY)
|
||||
except FileExistsError:
|
||||
raise # Let caller decide: another gateway is racing us
|
||||
try:
|
||||
with os.fdopen(fd, "w", encoding="utf-8") as f:
|
||||
f.write(record)
|
||||
except Exception:
|
||||
try:
|
||||
path.unlink(missing_ok=True)
|
||||
except OSError:
|
||||
pass
|
||||
raise
|
||||
|
||||
|
||||
def write_runtime_status(
|
||||
|
|
|
|||
|
|
@ -152,6 +152,23 @@ def auth_add_command(args) -> None:
|
|||
|
||||
pool = load_pool(provider)
|
||||
|
||||
# Clear ALL suppressions for this provider — re-adding a credential is
|
||||
# a strong signal the user wants auth re-enabled. This covers env:*
|
||||
# (shell-exported vars), gh_cli (copilot), claude_code, qwen-cli,
|
||||
# device_code (codex), etc. One consistent re-engagement pattern.
|
||||
# Matches the Codex device_code re-link pattern that predates this.
|
||||
if not provider.startswith(CUSTOM_POOL_PREFIX):
|
||||
try:
|
||||
from hermes_cli.auth import (
|
||||
_load_auth_store,
|
||||
unsuppress_credential_source,
|
||||
)
|
||||
suppressed = _load_auth_store().get("suppressed_sources", {})
|
||||
for src in list(suppressed.get(provider, []) or []):
|
||||
unsuppress_credential_source(provider, src)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if requested_type == AUTH_TYPE_API_KEY:
|
||||
token = (getattr(args, "api_key", None) or "").strip()
|
||||
if not token:
|
||||
|
|
@ -338,71 +355,28 @@ def auth_remove_command(args) -> None:
|
|||
raise SystemExit(f'No credential matching "{target}" for provider {provider}.')
|
||||
print(f"Removed {provider} credential #{index} ({removed.label})")
|
||||
|
||||
# If this was an env-seeded credential, also clear the env var from .env
|
||||
# so it doesn't get re-seeded on the next load_pool() call.
|
||||
if removed.source.startswith("env:"):
|
||||
env_var = removed.source[len("env:"):]
|
||||
if env_var:
|
||||
from hermes_cli.config import remove_env_value
|
||||
cleared = remove_env_value(env_var)
|
||||
if cleared:
|
||||
print(f"Cleared {env_var} from .env")
|
||||
# Unified removal dispatch. Every credential source Hermes reads from
|
||||
# (env vars, external OAuth files, auth.json blocks, custom config)
|
||||
# has a RemovalStep registered in agent.credential_sources. The step
|
||||
# handles its source-specific cleanup and we centralise suppression +
|
||||
# user-facing output here so every source behaves identically from
|
||||
# the user's perspective.
|
||||
from agent.credential_sources import find_removal_step
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
|
||||
# If this was a singleton-seeded credential (OAuth device_code, hermes_pkce),
|
||||
# clear the underlying auth store / credential file so it doesn't get
|
||||
# re-seeded on the next load_pool() call.
|
||||
elif provider == "openai-codex" and (
|
||||
removed.source == "device_code" or removed.source.endswith(":device_code")
|
||||
):
|
||||
# Codex tokens live in TWO places: the Hermes auth store and
|
||||
# ~/.codex/auth.json (the Codex CLI shared file). On every refresh,
|
||||
# refresh_codex_oauth_pure() writes to both. So clearing only the
|
||||
# Hermes auth store is not enough — _seed_from_singletons() will
|
||||
# auto-import from ~/.codex/auth.json on the next load_pool() and
|
||||
# the removal is instantly undone. Mark the source as suppressed
|
||||
# so auto-import is skipped; leave ~/.codex/auth.json untouched so
|
||||
# the Codex CLI itself keeps working.
|
||||
from hermes_cli.auth import (
|
||||
_load_auth_store, _save_auth_store, _auth_store_lock,
|
||||
suppress_credential_source,
|
||||
)
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
providers_dict = auth_store.get("providers")
|
||||
if isinstance(providers_dict, dict) and provider in providers_dict:
|
||||
del providers_dict[provider]
|
||||
_save_auth_store(auth_store)
|
||||
print(f"Cleared {provider} OAuth tokens from auth store")
|
||||
suppress_credential_source(provider, "device_code")
|
||||
print("Suppressed openai-codex device_code source — it will not be re-seeded.")
|
||||
print("Note: Codex CLI credentials still live in ~/.codex/auth.json")
|
||||
print("Run `hermes auth add openai-codex` to re-enable if needed.")
|
||||
step = find_removal_step(provider, removed.source)
|
||||
if step is None:
|
||||
# Unregistered source — e.g. "manual", which has nothing external
|
||||
# to clean up. The pool entry is already gone; we're done.
|
||||
return
|
||||
|
||||
elif removed.source == "device_code" and provider == "nous":
|
||||
from hermes_cli.auth import (
|
||||
_load_auth_store, _save_auth_store, _auth_store_lock,
|
||||
)
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
providers_dict = auth_store.get("providers")
|
||||
if isinstance(providers_dict, dict) and provider in providers_dict:
|
||||
del providers_dict[provider]
|
||||
_save_auth_store(auth_store)
|
||||
print(f"Cleared {provider} OAuth tokens from auth store")
|
||||
|
||||
elif removed.source == "hermes_pkce" and provider == "anthropic":
|
||||
from hermes_constants import get_hermes_home
|
||||
oauth_file = get_hermes_home() / ".anthropic_oauth.json"
|
||||
if oauth_file.exists():
|
||||
oauth_file.unlink()
|
||||
print("Cleared Hermes Anthropic OAuth credentials")
|
||||
|
||||
elif removed.source == "claude_code" and provider == "anthropic":
|
||||
from hermes_cli.auth import suppress_credential_source
|
||||
suppress_credential_source(provider, "claude_code")
|
||||
print("Suppressed claude_code credential — it will not be re-seeded.")
|
||||
print("Note: Claude Code credentials still live in ~/.claude/.credentials.json")
|
||||
print("Run `hermes auth add anthropic` to re-enable if needed.")
|
||||
result = step.remove_fn(provider, removed)
|
||||
for line in result.cleaned:
|
||||
print(line)
|
||||
if result.suppress:
|
||||
suppress_credential_source(provider, removed.source)
|
||||
for line in result.hints:
|
||||
print(line)
|
||||
|
||||
|
||||
def auth_reset_command(args) -> None:
|
||||
|
|
|
|||
|
|
@ -738,6 +738,26 @@ DEFAULT_CONFIG: _DefaultConfig = {
|
|||
# (terminal and execute_code). Skill-declared required_environment_variables
|
||||
# are passed through automatically; this list is for non-skill use cases.
|
||||
"env_passthrough": [],
|
||||
# Extra files to source in the login shell when building the
|
||||
# per-session environment snapshot. Use this when tools like nvm,
|
||||
# pyenv, asdf, or custom PATH entries are registered by files that
|
||||
# a bash login shell would skip — most commonly ``~/.bashrc``
|
||||
# (bash doesn't source bashrc in non-interactive login mode) or
|
||||
# zsh-specific files like ``~/.zshrc`` / ``~/.zprofile``.
|
||||
# Paths support ``~`` / ``${VAR}``. Missing files are silently
|
||||
# skipped. When empty, Hermes auto-appends ``~/.bashrc`` if the
|
||||
# snapshot shell is bash (this is the ``auto_source_bashrc``
|
||||
# behaviour — disable with that key if you want strict login-only
|
||||
# semantics).
|
||||
"shell_init_files": [],
|
||||
# When true (default), Hermes sources ``~/.bashrc`` in the login
|
||||
# shell used to build the environment snapshot. This captures
|
||||
# PATH additions, shell functions, and aliases defined in the
|
||||
# user's bashrc — which a plain ``bash -l -c`` would otherwise
|
||||
# miss because bash skips bashrc in non-interactive login mode.
|
||||
# Turn this off if you have a bashrc that misbehaves when sourced
|
||||
# non-interactively (e.g. one that hard-exits on TTY checks).
|
||||
"auto_source_bashrc": True,
|
||||
"docker_image": "nikolaik/python-nodejs:python3.11-nodejs20",
|
||||
"docker_forward_env": [],
|
||||
# Explicit environment variables to set inside Docker containers.
|
||||
|
|
@ -996,6 +1016,7 @@ DEFAULT_CONFIG: _DefaultConfig = {
|
|||
"record_key": "ctrl+b",
|
||||
"max_recording_seconds": 120,
|
||||
"auto_tts": False,
|
||||
"beep_enabled": True, # Play record start/stop beeps in CLI voice mode
|
||||
"silence_threshold": 200, # RMS below this = silence (0-32767)
|
||||
"silence_duration": 3.0, # Seconds of silence before auto-stop
|
||||
},
|
||||
|
|
@ -1054,6 +1075,20 @@ DEFAULT_CONFIG: _DefaultConfig = {
|
|||
# always goes to ~/.hermes/skills/.
|
||||
"skills": {
|
||||
"external_dirs": [], # e.g. ["~/.agents/skills", "/shared/team-skills"]
|
||||
# Substitute ${HERMES_SKILL_DIR} and ${HERMES_SESSION_ID} in SKILL.md
|
||||
# content with the absolute skill directory and the active session id
|
||||
# before the agent sees it. Lets skill authors reference bundled
|
||||
# scripts without the agent having to join paths.
|
||||
"template_vars": True,
|
||||
# Pre-execute inline shell snippets written as !`cmd` in SKILL.md
|
||||
# body. Their stdout is inlined into the skill message before the
|
||||
# agent reads it, so skills can inject dynamic context (dates, git
|
||||
# state, detected tool versions, …). Off by default because any
|
||||
# content from the skill author runs on the host without approval;
|
||||
# only enable for skill sources you trust.
|
||||
"inline_shell": False,
|
||||
# Timeout (seconds) for each !`cmd` snippet when inline_shell is on.
|
||||
"inline_shell_timeout": 10,
|
||||
},
|
||||
|
||||
# Honcho AI-native memory -- reads ~/.honcho/config.json as single source of truth.
|
||||
|
|
@ -1200,7 +1235,7 @@ DEFAULT_CONFIG: _DefaultConfig = {
|
|||
},
|
||||
|
||||
# Config schema version - bump this when adding new required fields
|
||||
"_config_version": 21,
|
||||
"_config_version": 22,
|
||||
}
|
||||
|
||||
# =============================================================================
|
||||
|
|
|
|||
|
|
@ -1327,7 +1327,6 @@ def cmd_whatsapp(args):
|
|||
except (EOFError, KeyboardInterrupt):
|
||||
response = "n"
|
||||
if response.lower() in ("y", "yes"):
|
||||
|
||||
shutil.rmtree(session_dir, ignore_errors=True)
|
||||
session_dir.mkdir(parents=True, exist_ok=True)
|
||||
print(" ✓ Session cleared")
|
||||
|
|
@ -5213,7 +5212,9 @@ def _install_hangup_protection(gateway_mode: bool = False):
|
|||
# (2) Mirror output to update.log and wrap stdio for broken-pipe
|
||||
# tolerance. Any failure here is non-fatal; we just skip the wrap.
|
||||
try:
|
||||
logs_dir = get_hermes_home() / "logs"
|
||||
from hermes_cli.config import get_hermes_home as _get_hermes_home
|
||||
|
||||
logs_dir = _get_hermes_home() / "logs"
|
||||
logs_dir.mkdir(parents=True, exist_ok=True)
|
||||
log_path = logs_dir / "update.log"
|
||||
log_file = open(log_path, "a", buffering=1, encoding="utf-8")
|
||||
|
|
|
|||
|
|
@ -292,6 +292,7 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"big-pickle",
|
||||
],
|
||||
"opencode-go": [
|
||||
"kimi-k2.6",
|
||||
"kimi-k2.5",
|
||||
"glm-5.1",
|
||||
"glm-5",
|
||||
|
|
@ -299,6 +300,8 @@ _PROVIDER_MODELS: dict[str, list[str]] = {
|
|||
"mimo-v2-omni",
|
||||
"minimax-m2.7",
|
||||
"minimax-m2.5",
|
||||
"qwen3.6-plus",
|
||||
"qwen3.5-plus",
|
||||
],
|
||||
"kilocode": [
|
||||
"anthropic/claude-opus-4.6",
|
||||
|
|
@ -685,6 +688,31 @@ def _openrouter_model_is_free(pricing: Any) -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def _openrouter_model_supports_tools(item: Any) -> bool:
|
||||
"""Return True when the model's ``supported_parameters`` advertise tool calling.
|
||||
|
||||
hermes-agent is tool-calling-first — every provider path assumes the model
|
||||
can invoke tools. Models that don't advertise ``tools`` in their
|
||||
``supported_parameters`` (e.g. image-only or completion-only models) cannot
|
||||
be driven by the agent loop and would fail at the first tool call.
|
||||
|
||||
**Permissive when the field is missing.** Some OpenRouter-compatible gateways
|
||||
(Nous Portal, private mirrors, older catalog snapshots) don't populate
|
||||
``supported_parameters`` at all. Treat that as "unknown capability → allow"
|
||||
so the picker doesn't silently empty for those users. Only hide models
|
||||
whose ``supported_parameters`` is an explicit list that omits ``tools``.
|
||||
|
||||
Ported from Kilo-Org/kilocode#9068.
|
||||
"""
|
||||
if not isinstance(item, dict):
|
||||
return True
|
||||
params = item.get("supported_parameters")
|
||||
if not isinstance(params, list):
|
||||
# Field absent / malformed / None — be permissive.
|
||||
return True
|
||||
return "tools" in params
|
||||
|
||||
|
||||
def fetch_openrouter_models(
|
||||
timeout: float = 8.0,
|
||||
*,
|
||||
|
|
@ -727,6 +755,11 @@ def fetch_openrouter_models(
|
|||
live_item = live_by_id.get(preferred_id)
|
||||
if live_item is None:
|
||||
continue
|
||||
# Hide models that don't advertise tool-calling support — hermes-agent
|
||||
# requires it and surfacing them leads to immediate runtime failures
|
||||
# when the user selects them. Ported from Kilo-Org/kilocode#9068.
|
||||
if not _openrouter_model_supports_tools(live_item):
|
||||
continue
|
||||
desc = "free" if _openrouter_model_is_free(live_item.get("pricing")) else ""
|
||||
curated.append((preferred_id, desc))
|
||||
|
||||
|
|
@ -2393,13 +2426,70 @@ def validate_requested_model(
|
|||
except Exception:
|
||||
pass # Fall through to generic warning
|
||||
|
||||
# Static-catalog fallback: when the /models probe was unreachable,
|
||||
# validate against the curated list from provider_model_ids() — same
|
||||
# pattern as the openai-codex and minimax branches above. This fixes
|
||||
# /model switches in the gateway for providers like opencode-go and
|
||||
# opencode-zen whose /models endpoint returns 404 against the HTML
|
||||
# marketing site. Without this block, validate_requested_model would
|
||||
# reject every model on such providers, switch_model() would return
|
||||
# success=False, and the gateway would never write to
|
||||
# _session_model_overrides.
|
||||
provider_label = _PROVIDER_LABELS.get(normalized, normalized)
|
||||
try:
|
||||
catalog_models = provider_model_ids(normalized)
|
||||
except Exception:
|
||||
catalog_models = []
|
||||
|
||||
if catalog_models:
|
||||
catalog_lower = {m.lower(): m for m in catalog_models}
|
||||
if requested_for_lookup.lower() in catalog_lower:
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": True,
|
||||
"message": None,
|
||||
}
|
||||
catalog_lower_list = list(catalog_lower.keys())
|
||||
auto = get_close_matches(
|
||||
requested_for_lookup.lower(), catalog_lower_list, n=1, cutoff=0.9
|
||||
)
|
||||
if auto:
|
||||
corrected = catalog_lower[auto[0]]
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": True,
|
||||
"corrected_model": corrected,
|
||||
"message": f"Auto-corrected `{requested}` → `{corrected}`",
|
||||
}
|
||||
suggestions = get_close_matches(
|
||||
requested_for_lookup.lower(), catalog_lower_list, n=3, cutoff=0.5
|
||||
)
|
||||
suggestion_text = ""
|
||||
if suggestions:
|
||||
suggestion_text = "\n Similar models: " + ", ".join(
|
||||
f"`{catalog_lower[s]}`" for s in suggestions
|
||||
)
|
||||
return {
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": (
|
||||
f"Note: `{requested}` was not found in the {provider_label} curated catalog "
|
||||
f"and the /models endpoint was unreachable.{suggestion_text}"
|
||||
f"\n The model may still work if it exists on the provider."
|
||||
),
|
||||
}
|
||||
|
||||
# No catalog available — accept with a warning, matching the comment's
|
||||
# stated intent ("Accept and persist, but warn").
|
||||
return {
|
||||
"accepted": False,
|
||||
"persist": False,
|
||||
"accepted": True,
|
||||
"persist": True,
|
||||
"recognized": False,
|
||||
"message": (
|
||||
f"Could not reach the {provider_label} API to validate `{requested}`. "
|
||||
f"Note: could not reach the {provider_label} API to validate `{requested}`. "
|
||||
f"If the service isn't down, this model may not be valid."
|
||||
),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ from hermes_cli.auth import get_nous_auth_status
|
|||
from hermes_cli.config import get_env_value, load_config
|
||||
from tools.managed_tool_gateway import is_managed_tool_gateway_ready
|
||||
from tools.tool_backend_helpers import (
|
||||
fal_key_is_configured,
|
||||
has_direct_modal_credentials,
|
||||
managed_nous_tools_enabled,
|
||||
normalize_browser_cloud_provider,
|
||||
|
|
@ -271,7 +272,7 @@ def get_nous_subscription_features(
|
|||
direct_firecrawl = bool(get_env_value("FIRECRAWL_API_KEY") or get_env_value("FIRECRAWL_API_URL"))
|
||||
direct_parallel = bool(get_env_value("PARALLEL_API_KEY"))
|
||||
direct_tavily = bool(get_env_value("TAVILY_API_KEY"))
|
||||
direct_fal = bool(get_env_value("FAL_KEY"))
|
||||
direct_fal = fal_key_is_configured()
|
||||
direct_openai_tts = bool(resolve_openai_audio_api_key())
|
||||
direct_elevenlabs = bool(get_env_value("ELEVENLABS_API_KEY"))
|
||||
direct_camofox = bool(get_env_value("CAMOFOX_URL"))
|
||||
|
|
@ -520,7 +521,7 @@ def apply_nous_managed_defaults(
|
|||
browser_cfg["cloud_provider"] = "browser-use"
|
||||
changed.add("browser")
|
||||
|
||||
if "image_gen" in selected_toolsets and not get_env_value("FAL_KEY"):
|
||||
if "image_gen" in selected_toolsets and not fal_key_is_configured():
|
||||
changed.add("image_gen")
|
||||
|
||||
return changed
|
||||
|
|
@ -548,7 +549,7 @@ def _get_gateway_direct_credentials() -> Dict[str, bool]:
|
|||
or get_env_value("TAVILY_API_KEY")
|
||||
or get_env_value("EXA_API_KEY")
|
||||
),
|
||||
"image_gen": bool(get_env_value("FAL_KEY")),
|
||||
"image_gen": fal_key_is_configured(),
|
||||
"tts": bool(
|
||||
resolve_openai_audio_api_key()
|
||||
or get_env_value("ELEVENLABS_API_KEY")
|
||||
|
|
|
|||
|
|
@ -492,8 +492,12 @@ def _resolve_openrouter_runtime(
|
|||
else:
|
||||
# Custom endpoint: use api_key from config when using config base_url (#1760).
|
||||
# When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
|
||||
# the canonical env var for ollama.com authentication.
|
||||
_is_ollama_url = "ollama.com" in base_url.lower()
|
||||
# the canonical env var for ollama.com authentication. Match on
|
||||
# HOST, not substring — a custom base_url whose path contains
|
||||
# "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
|
||||
# hostname is a look-alike (ollama.com.attacker.test) must not
|
||||
# receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
|
||||
_is_ollama_url = base_url_host_matches(base_url, "ollama.com")
|
||||
api_key_candidates = [
|
||||
explicit_api_key,
|
||||
(cfg_api_key if use_config_base_url else ""),
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ _DEFAULT_PROVIDER_MODELS = {
|
|||
"ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"],
|
||||
"kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"],
|
||||
"opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"],
|
||||
"opencode-go": ["glm-5.1", "glm-5", "kimi-k2.5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7"],
|
||||
"opencode-go": ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.5", "minimax-m2.7", "qwen3.6-plus", "qwen3.5-plus"],
|
||||
"huggingface": [
|
||||
"Qwen/Qwen3.5-397B-A17B", "Qwen/Qwen3-235B-A22B-Thinking-2507",
|
||||
"Qwen/Qwen3-Coder-480B-A35B-Instruct", "deepseek-ai/DeepSeek-R1-0528",
|
||||
|
|
@ -441,6 +441,16 @@ def _print_setup_summary(config: dict, hermes_home):
|
|||
tool_status.append(("Text-to-Speech (NeuTTS local)", True, None))
|
||||
else:
|
||||
tool_status.append(("Text-to-Speech (NeuTTS — not installed)", False, "run 'hermes setup tts'"))
|
||||
elif tts_provider == "kittentts":
|
||||
try:
|
||||
import importlib.util
|
||||
kittentts_ok = importlib.util.find_spec("kittentts") is not None
|
||||
except Exception:
|
||||
kittentts_ok = False
|
||||
if kittentts_ok:
|
||||
tool_status.append(("Text-to-Speech (KittenTTS local)", True, None))
|
||||
else:
|
||||
tool_status.append(("Text-to-Speech (KittenTTS — not installed)", False, "run 'hermes setup tts'"))
|
||||
else:
|
||||
tool_status.append(("Text-to-Speech (Edge TTS)", True, None))
|
||||
|
||||
|
|
@ -901,6 +911,31 @@ def _install_neutts_deps() -> bool:
|
|||
return False
|
||||
|
||||
|
||||
def _install_kittentts_deps() -> bool:
|
||||
"""Install KittenTTS dependencies with user approval. Returns True on success."""
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
wheel_url = (
|
||||
"https://github.com/KittenML/KittenTTS/releases/download/"
|
||||
"0.8.1/kittentts-0.8.1-py3-none-any.whl"
|
||||
)
|
||||
print()
|
||||
print_info("Installing kittentts Python package (~25-80MB model downloaded on first use)...")
|
||||
print()
|
||||
try:
|
||||
subprocess.run(
|
||||
[sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
|
||||
check=True, timeout=300,
|
||||
)
|
||||
print_success("kittentts installed successfully")
|
||||
return True
|
||||
except (subprocess.CalledProcessError, subprocess.TimeoutExpired) as e:
|
||||
print_error(f"Failed to install kittentts: {e}")
|
||||
print_info(f"Try manually: python -m pip install -U '{wheel_url}' soundfile")
|
||||
return False
|
||||
|
||||
|
||||
def _setup_tts_provider(config: dict):
|
||||
"""Interactive TTS provider selection with install flow for NeuTTS."""
|
||||
tts_config = config.get("tts", {})
|
||||
|
|
@ -916,6 +951,7 @@ def _setup_tts_provider(config: dict):
|
|||
"mistral": "Mistral Voxtral TTS",
|
||||
"gemini": "Google Gemini TTS",
|
||||
"neutts": "NeuTTS",
|
||||
"kittentts": "KittenTTS",
|
||||
}
|
||||
current_label = provider_labels.get(current_provider, current_provider)
|
||||
|
||||
|
|
@ -939,9 +975,10 @@ def _setup_tts_provider(config: dict):
|
|||
"Mistral Voxtral TTS (multilingual, native Opus, needs API key)",
|
||||
"Google Gemini TTS (30 prebuilt voices, prompt-controllable, needs API key)",
|
||||
"NeuTTS (local on-device, free, ~300MB model download)",
|
||||
"KittenTTS (local on-device, free, lightweight ~25-80MB ONNX)",
|
||||
]
|
||||
)
|
||||
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts"])
|
||||
providers.extend(["edge", "elevenlabs", "openai", "xai", "minimax", "mistral", "gemini", "neutts", "kittentts"])
|
||||
choices.append(f"Keep current ({current_label})")
|
||||
keep_current_idx = len(choices) - 1
|
||||
idx = prompt_choice("Select TTS provider:", choices, keep_current_idx)
|
||||
|
|
@ -1060,6 +1097,29 @@ def _setup_tts_provider(config: dict):
|
|||
print_warning("No API key provided. Falling back to Edge TTS.")
|
||||
selected = "edge"
|
||||
|
||||
elif selected == "kittentts":
|
||||
# Check if already installed
|
||||
try:
|
||||
import importlib.util
|
||||
already_installed = importlib.util.find_spec("kittentts") is not None
|
||||
except Exception:
|
||||
already_installed = False
|
||||
|
||||
if already_installed:
|
||||
print_success("KittenTTS is already installed")
|
||||
else:
|
||||
print()
|
||||
print_info("KittenTTS is lightweight (~25-80MB, CPU-only, no API key required).")
|
||||
print_info("Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
|
||||
print()
|
||||
if prompt_yes_no("Install KittenTTS now?", True):
|
||||
if not _install_kittentts_deps():
|
||||
print_warning("KittenTTS installation incomplete. Falling back to Edge TTS.")
|
||||
selected = "edge"
|
||||
else:
|
||||
print_info("Skipping install. Set tts.provider to 'kittentts' after installing manually.")
|
||||
selected = "edge"
|
||||
|
||||
# Save the selection
|
||||
if "tts" not in config:
|
||||
config["tts"] = {}
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from hermes_cli.nous_subscription import (
|
|||
apply_nous_managed_defaults,
|
||||
get_nous_subscription_features,
|
||||
)
|
||||
from tools.tool_backend_helpers import managed_nous_tools_enabled
|
||||
from tools.tool_backend_helpers import fal_key_is_configured, managed_nous_tools_enabled
|
||||
from utils import base_url_hostname
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -182,6 +182,14 @@ TOOL_CATEGORIES = {
|
|||
],
|
||||
"tts_provider": "gemini",
|
||||
},
|
||||
{
|
||||
"name": "KittenTTS",
|
||||
"badge": "local · free",
|
||||
"tag": "Lightweight local ONNX TTS (~25MB), no API key",
|
||||
"env_vars": [],
|
||||
"tts_provider": "kittentts",
|
||||
"post_setup": "kittentts",
|
||||
},
|
||||
],
|
||||
},
|
||||
"web": {
|
||||
|
|
@ -423,6 +431,36 @@ def _run_post_setup(post_setup_key: str):
|
|||
_print_warning(" Node.js not found. Install Camofox via Docker:")
|
||||
_print_info(" docker run -p 9377:9377 -e CAMOFOX_PORT=9377 jo-inc/camofox-browser")
|
||||
|
||||
elif post_setup_key == "kittentts":
|
||||
try:
|
||||
__import__("kittentts")
|
||||
_print_success(" kittentts is already installed")
|
||||
return
|
||||
except ImportError:
|
||||
pass
|
||||
import subprocess
|
||||
_print_info(" Installing kittentts (~25-80MB model, CPU-only)...")
|
||||
wheel_url = (
|
||||
"https://github.com/KittenML/KittenTTS/releases/download/"
|
||||
"0.8.1/kittentts-0.8.1-py3-none-any.whl"
|
||||
)
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[sys.executable, "-m", "pip", "install", "-U", wheel_url, "soundfile", "--quiet"],
|
||||
capture_output=True, text=True, timeout=300,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
_print_success(" kittentts installed")
|
||||
_print_info(" Voices: Jasper, Bella, Luna, Bruno, Rosie, Hugo, Kiki, Leo")
|
||||
_print_info(" Models: KittenML/kitten-tts-nano-0.8-int8 (25MB), micro (41MB), mini (80MB)")
|
||||
else:
|
||||
_print_warning(" kittentts install failed:")
|
||||
_print_info(f" {result.stderr.strip()[:300]}")
|
||||
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
|
||||
except subprocess.TimeoutExpired:
|
||||
_print_warning(" kittentts install timed out (>5min)")
|
||||
_print_info(f" Run manually: python -m pip install -U '{wheel_url}' soundfile")
|
||||
|
||||
elif post_setup_key == "rl_training":
|
||||
try:
|
||||
__import__("tinker_atropos")
|
||||
|
|
@ -833,7 +871,7 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool:
|
|||
browser_cfg = config.get("browser", {})
|
||||
return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg
|
||||
if ts_key == "image_gen":
|
||||
return not get_env_value("FAL_KEY")
|
||||
return not fal_key_is_configured()
|
||||
|
||||
return not _toolset_has_keys(ts_key, config)
|
||||
|
||||
|
|
|
|||
|
|
@ -114,6 +114,91 @@ def _require_token(request: Request) -> None:
|
|||
raise HTTPException(status_code=401, detail="Unauthorized")
|
||||
|
||||
|
||||
# Accepted Host header values for loopback binds. DNS rebinding attacks
|
||||
# point a victim browser at an attacker-controlled hostname (evil.test)
|
||||
# which resolves to 127.0.0.1 after a TTL flip — bypassing same-origin
|
||||
# checks because the browser now considers evil.test and our dashboard
|
||||
# "same origin". Validating the Host header at the app layer rejects any
|
||||
# request whose Host isn't one we bound for. See GHSA-ppp5-vxwm-4cf7.
|
||||
_LOOPBACK_HOST_VALUES: frozenset = frozenset({
|
||||
"localhost", "127.0.0.1", "::1",
|
||||
})
|
||||
|
||||
|
||||
def _is_accepted_host(host_header: str, bound_host: str) -> bool:
|
||||
"""True if the Host header targets the interface we bound to.
|
||||
|
||||
Accepts:
|
||||
- Exact bound host (with or without port suffix)
|
||||
- Loopback aliases when bound to loopback
|
||||
- Any host when bound to 0.0.0.0 (explicit opt-in to non-loopback,
|
||||
no protection possible at this layer)
|
||||
"""
|
||||
if not host_header:
|
||||
return False
|
||||
# Strip port suffix. IPv6 addresses use bracket notation:
|
||||
# [::1] — no port
|
||||
# [::1]:9119 — with port
|
||||
# Plain hosts/v4:
|
||||
# localhost:9119
|
||||
# 127.0.0.1:9119
|
||||
h = host_header.strip()
|
||||
if h.startswith("["):
|
||||
# IPv6 bracketed — port (if any) follows "]:"
|
||||
close = h.find("]")
|
||||
if close != -1:
|
||||
host_only = h[1:close] # strip brackets
|
||||
else:
|
||||
host_only = h.strip("[]")
|
||||
else:
|
||||
host_only = h.rsplit(":", 1)[0] if ":" in h else h
|
||||
host_only = host_only.lower()
|
||||
|
||||
# 0.0.0.0 bind means operator explicitly opted into all-interfaces
|
||||
# (requires --insecure per web_server.start_server). No Host-layer
|
||||
# defence can protect that mode; rely on operator network controls.
|
||||
if bound_host in ("0.0.0.0", "::"):
|
||||
return True
|
||||
|
||||
# Loopback bind: accept the loopback names
|
||||
bound_lc = bound_host.lower()
|
||||
if bound_lc in _LOOPBACK_HOST_VALUES:
|
||||
return host_only in _LOOPBACK_HOST_VALUES
|
||||
|
||||
# Explicit non-loopback bind: require exact host match
|
||||
return host_only == bound_lc
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
async def host_header_middleware(request: Request, call_next):
|
||||
"""Reject requests whose Host header doesn't match the bound interface.
|
||||
|
||||
Defends against DNS rebinding: a victim browser on a localhost
|
||||
dashboard is tricked into fetching from an attacker hostname that
|
||||
TTL-flips to 127.0.0.1. CORS and same-origin checks don't help —
|
||||
the browser now treats the attacker origin as same-origin with the
|
||||
dashboard. Host-header validation at the app layer catches it.
|
||||
|
||||
See GHSA-ppp5-vxwm-4cf7.
|
||||
"""
|
||||
# Store the bound host on app.state so this middleware can read it —
|
||||
# set by start_server() at listen time.
|
||||
bound_host = getattr(app.state, "bound_host", None)
|
||||
if bound_host:
|
||||
host_header = request.headers.get("host", "")
|
||||
if not _is_accepted_host(host_header, bound_host):
|
||||
return JSONResponse(
|
||||
status_code=400,
|
||||
content={
|
||||
"detail": (
|
||||
"Invalid Host header. Dashboard requests must use "
|
||||
"the hostname the server was bound to."
|
||||
),
|
||||
},
|
||||
)
|
||||
return await call_next(request)
|
||||
|
||||
|
||||
@app.middleware("http")
|
||||
async def auth_middleware(request: Request, call_next):
|
||||
"""Require the session token on all /api/ routes except the public list."""
|
||||
|
|
@ -2323,6 +2408,10 @@ def start_server(
|
|||
"authentication. Only use on trusted networks.", host,
|
||||
)
|
||||
|
||||
# Record the bound host so host_header_middleware can validate incoming
|
||||
# Host headers against it. Defends against DNS rebinding (GHSA-ppp5-vxwm-4cf7).
|
||||
app.state.bound_host = host
|
||||
|
||||
if open_browser:
|
||||
import webbrowser
|
||||
|
||||
|
|
|
|||
3
optional-skills/dogfood/DESCRIPTION.md
Normal file
3
optional-skills/dogfood/DESCRIPTION.md
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
# Dogfood — Advanced QA & Testing Skills
|
||||
|
||||
Specialized QA workflows that go beyond basic bug-finding. These skills use structured methodologies to surface UX friction, accessibility issues, and product-level problems that standard testing misses.
|
||||
190
optional-skills/dogfood/adversarial-ux-test/SKILL.md
Normal file
190
optional-skills/dogfood/adversarial-ux-test/SKILL.md
Normal file
|
|
@ -0,0 +1,190 @@
|
|||
---
|
||||
name: adversarial-ux-test
|
||||
description: Roleplay the most difficult, tech-resistant user for your product. Browse the app as that persona, find every UX pain point, then filter complaints through a pragmatism layer to separate real problems from noise. Creates actionable tickets from genuine issues only.
|
||||
version: 1.0.0
|
||||
author: Omni @ Comelse
|
||||
license: MIT
|
||||
metadata:
|
||||
hermes:
|
||||
tags: [qa, ux, testing, adversarial, dogfood, personas, user-testing]
|
||||
related_skills: [dogfood]
|
||||
---
|
||||
|
||||
# Adversarial UX Test
|
||||
|
||||
Roleplay the worst-case user for your product — the person who hates technology, doesn't want your software, and will find every reason to complain. Then filter their feedback through a pragmatism layer to separate real UX problems from "I hate computers" noise.
|
||||
|
||||
Think of it as an automated "mom test" — but angry.
|
||||
|
||||
## Why This Works
|
||||
|
||||
Most QA finds bugs. This finds **friction**. A technically correct app can still be unusable for real humans. The adversarial persona catches:
|
||||
- Confusing terminology that makes sense to developers but not users
|
||||
- Too many steps to accomplish basic tasks
|
||||
- Missing onboarding or "aha moments"
|
||||
- Accessibility issues (font size, contrast, click targets)
|
||||
- Cold-start problems (empty states, no demo content)
|
||||
- Paywall/signup friction that kills conversion
|
||||
|
||||
The **pragmatism filter** (Phase 3) is what makes this useful instead of just entertaining. Without it, you'd add a "print this page" button to every screen because Grandpa can't figure out PDFs.
|
||||
|
||||
## How to Use
|
||||
|
||||
Tell the agent:
|
||||
```
|
||||
"Run an adversarial UX test on [URL]"
|
||||
"Be a grumpy [persona type] and test [app name]"
|
||||
"Do an asshole user test on my staging site"
|
||||
```
|
||||
|
||||
You can provide a persona or let the agent generate one based on your product's target audience.
|
||||
|
||||
## Step 1: Define the Persona
|
||||
|
||||
If no persona is provided, generate one by answering:
|
||||
|
||||
1. **Who is the HARDEST user for this product?** (age 50+, non-technical role, decades of experience doing it "the old way")
|
||||
2. **What is their tech comfort level?** (the lower the better — WhatsApp-only, paper notebooks, wife set up their email)
|
||||
3. **What is the ONE thing they need to accomplish?** (their core job, not your feature list)
|
||||
4. **What would make them give up?** (too many clicks, jargon, slow, confusing)
|
||||
5. **How do they talk when frustrated?** (blunt, sweary, dismissive, sighing)
|
||||
|
||||
### Good Persona Example
|
||||
> **"Big Mick" McAllister** — 58-year-old S&C coach. Uses WhatsApp and that's it. His "spreadsheet" is a paper notebook. "If I can't figure it out in 10 seconds I'm going back to my notebook." Needs to log session results for 25 players. Hates small text, jargon, and passwords.
|
||||
|
||||
### Bad Persona Example
|
||||
> "A user who doesn't like the app" — too vague, no constraints, no voice.
|
||||
|
||||
The persona must be **specific enough to stay in character** for 20 minutes of testing.
|
||||
|
||||
## Step 2: Become the Asshole (Browse as the Persona)
|
||||
|
||||
1. Read any available project docs for app context and URLs
|
||||
2. **Fully inhabit the persona** — their frustrations, limitations, goals
|
||||
3. Navigate to the app using browser tools
|
||||
4. **Attempt the persona's ACTUAL TASKS** (not a feature tour):
|
||||
- Can they do what they came to do?
|
||||
- How many clicks/screens to accomplish it?
|
||||
- What confuses them?
|
||||
- What makes them angry?
|
||||
- Where do they get lost?
|
||||
- What would make them give up and go back to their old way?
|
||||
|
||||
5. Test these friction categories:
|
||||
- **First impression** — would they even bother past the landing page?
|
||||
- **Core workflow** — the ONE thing they need to do most often
|
||||
- **Error recovery** — what happens when they do something wrong?
|
||||
- **Readability** — text size, contrast, information density
|
||||
- **Speed** — does it feel faster than their current method?
|
||||
- **Terminology** — any jargon they wouldn't understand?
|
||||
- **Navigation** — can they find their way back? do they know where they are?
|
||||
|
||||
6. Take screenshots of every pain point
|
||||
7. Check browser console for JS errors on every page
|
||||
|
||||
## Step 3: The Rant (Write Feedback in Character)
|
||||
|
||||
Write the feedback AS THE PERSONA — in their voice, with their frustrations. This is not a bug report. This is a real human venting.
|
||||
|
||||
```
|
||||
[PERSONA NAME]'s Review of [PRODUCT]
|
||||
|
||||
Overall: [Would they keep using it? Yes/No/Maybe with conditions]
|
||||
|
||||
THE GOOD (grudging admission):
|
||||
- [things even they have to admit work]
|
||||
|
||||
THE BAD (legitimate UX issues):
|
||||
- [real problems that would stop them from using the product]
|
||||
|
||||
THE UGLY (showstoppers):
|
||||
- [things that would make them uninstall/cancel immediately]
|
||||
|
||||
SPECIFIC COMPLAINTS:
|
||||
1. [Page/feature]: "[quote in persona voice]" — [what happened, expected]
|
||||
2. ...
|
||||
|
||||
VERDICT: "[one-line persona quote summarizing their experience]"
|
||||
```
|
||||
|
||||
## Step 4: The Pragmatism Filter (Critical — Do Not Skip)
|
||||
|
||||
Step OUT of the persona. Evaluate each complaint as a product person:
|
||||
|
||||
- **RED: REAL UX BUG** — Any user would have this problem, not just grumpy ones. Fix it.
|
||||
- **YELLOW: VALID BUT LOW PRIORITY** — Real issue but only for extreme users. Note it.
|
||||
- **WHITE: PERSONA NOISE** — "I hate computers" talking, not a product problem. Skip it.
|
||||
- **GREEN: FEATURE REQUEST** — Good idea hidden in the complaint. Consider it.
|
||||
|
||||
### Filter Criteria
|
||||
1. Would a 35-year-old competent-but-busy user have the same complaint? → RED
|
||||
2. Is this a genuine accessibility issue (font size, contrast, click targets)? → RED
|
||||
3. Is this "I want it to work like paper" resistance to digital? → WHITE
|
||||
4. Is this a real workflow inefficiency the persona stumbled on? → YELLOW or RED
|
||||
5. Would fixing this add complexity for the 80% who are fine? → WHITE
|
||||
6. Does the complaint reveal a missing onboarding moment? → GREEN
|
||||
|
||||
**This filter is MANDATORY.** Never ship raw persona complaints as tickets.
|
||||
|
||||
## Step 5: Create Tickets
|
||||
|
||||
For **RED** and **GREEN** items only:
|
||||
- Clear, actionable title
|
||||
- Include the persona's verbatim quote (entertaining + memorable)
|
||||
- The real UX issue underneath (objective)
|
||||
- A suggested fix (actionable)
|
||||
- Tag/label: "ux-review"
|
||||
|
||||
For **YELLOW** items: one catch-all ticket with all notes.
|
||||
|
||||
**WHITE** items appear in the report only. No tickets.
|
||||
|
||||
**Max 10 tickets per session** — focus on the worst issues.
|
||||
|
||||
## Step 6: Report
|
||||
|
||||
Deliver:
|
||||
1. The persona rant (Step 3) — entertaining and visceral
|
||||
2. The filtered assessment (Step 4) — pragmatic and actionable
|
||||
3. Tickets created (Step 5) — with links
|
||||
4. Screenshots of key issues
|
||||
|
||||
## Tips
|
||||
|
||||
- **One persona per session.** Don't mix perspectives.
|
||||
- **Stay in character during Steps 2-3.** Break character only at Step 4.
|
||||
- **Test the CORE WORKFLOW first.** Don't get distracted by settings pages.
|
||||
- **Empty states are gold.** New user experience reveals the most friction.
|
||||
- **The best findings are RED items the persona found accidentally** while trying to do something else.
|
||||
- **If the persona has zero complaints, your persona is too tech-savvy.** Make them older, less patient, more set in their ways.
|
||||
- **Run this before demos, launches, or after shipping a batch of features.**
|
||||
- **Register as a NEW user when possible.** Don't use pre-seeded admin accounts — the cold start experience is where most friction lives.
|
||||
- **Zero WHITE items is a signal, not a failure.** If the pragmatism filter finds no noise, your product has real UX problems, not just a grumpy persona.
|
||||
- **Check known issues in project docs AFTER the test.** If the persona found a bug that's already in the known issues list, that's actually the most damning finding — it means the team knew about it but never felt the user's pain.
|
||||
- **Subscription/paywall testing is critical.** Test with expired accounts, not just active ones. The "what happens when you can't pay" experience reveals whether the product respects users or holds their data hostage.
|
||||
- **Count the clicks to accomplish the persona's ONE task.** If it's more than 5, that's almost always a RED finding regardless of persona tech level.
|
||||
|
||||
## Example Personas by Industry
|
||||
|
||||
These are starting points — customize for your specific product:
|
||||
|
||||
| Product Type | Persona | Age | Key Trait |
|
||||
|-------------|---------|-----|-----------|
|
||||
| CRM | Retirement home director | 68 | Filing cabinet is the current CRM |
|
||||
| Photography SaaS | Rural wedding photographer | 62 | Books clients by phone, invoices on paper |
|
||||
| AI/ML Tool | Department store buyer | 55 | Burned by 3 failed tech startups |
|
||||
| Fitness App | Old-school gym coach | 58 | Paper notebook, thick fingers, bad eyes |
|
||||
| Accounting | Family bakery owner | 64 | Shoebox of receipts, hates subscriptions |
|
||||
| E-commerce | Market stall vendor | 60 | Cash only, smartphone is for calls |
|
||||
| Healthcare | Senior GP | 63 | Dictates notes, nurse handles the computer |
|
||||
| Education | Veteran teacher | 57 | Chalk and talk, worksheets in ring binders |
|
||||
|
||||
## Rules
|
||||
|
||||
- Stay in character during Steps 2-3
|
||||
- Be genuinely mean but fair — find real problems, not manufactured ones
|
||||
- The pragmatism filter (Step 4) is **MANDATORY**
|
||||
- Screenshots required for every complaint
|
||||
- Max 10 tickets per session
|
||||
- Test on staging/deployed app, not local dev
|
||||
- One persona, one session, one report
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
# NOTE: This file is maintained for convenience only.
|
||||
# The canonical dependency list is in pyproject.toml.
|
||||
# Preferred install: pip install -e ".[all]"
|
||||
|
||||
# Core dependencies
|
||||
openai
|
||||
python-dotenv
|
||||
fire
|
||||
httpx
|
||||
rich
|
||||
tenacity
|
||||
prompt_toolkit
|
||||
pyyaml
|
||||
requests
|
||||
jinja2
|
||||
pydantic>=2.0
|
||||
PyJWT[crypto]
|
||||
debugpy
|
||||
|
||||
# Web tools
|
||||
firecrawl-py
|
||||
parallel-web>=0.4.2
|
||||
|
||||
# Image generation
|
||||
fal-client
|
||||
|
||||
# Text-to-speech (Edge TTS is free, no API key needed)
|
||||
edge-tts
|
||||
|
||||
# Optional: For cron expression parsing (cronjob scheduling)
|
||||
croniter
|
||||
|
||||
# Optional: For messaging platform integrations (gateway)
|
||||
python-telegram-bot[webhooks]>=22.6
|
||||
discord.py>=2.0
|
||||
aiohttp>=3.9.0
|
||||
232
run_agent.py
232
run_agent.py
|
|
@ -127,7 +127,7 @@ from agent.trajectory import (
|
|||
convert_scratchpad_to_think, has_incomplete_scratchpad,
|
||||
save_trajectory as _save_trajectory_to_file,
|
||||
)
|
||||
from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled
|
||||
from utils import atomic_json_write, base_url_host_matches, base_url_hostname, env_var_enabled, normalize_proxy_url
|
||||
|
||||
|
||||
|
||||
|
|
@ -190,7 +190,7 @@ def _get_proxy_from_env() -> Optional[str]:
|
|||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
value = os.environ.get(key, "").strip()
|
||||
if value:
|
||||
return value
|
||||
return normalize_proxy_url(value)
|
||||
return None
|
||||
|
||||
|
||||
|
|
@ -2358,6 +2358,13 @@ class AIAgent:
|
|||
cost reduction as direct Anthropic callers, provided their
|
||||
gateway implements the Anthropic cache_control contract
|
||||
(MiniMax, Zhipu GLM, LiteLLM's Anthropic proxy mode all do).
|
||||
|
||||
Qwen / Alibaba-family models on OpenCode, OpenCode Go, and direct
|
||||
Alibaba (DashScope) also honour Anthropic-style ``cache_control``
|
||||
markers on OpenAI-wire chat completions. Upstream pi-mono #3392 /
|
||||
pi #3393 documented this for opencode-go Qwen. Without markers
|
||||
these providers serve zero cache hits, re-billing the full prompt
|
||||
on every turn.
|
||||
"""
|
||||
eff_provider = (provider if provider is not None else self.provider) or ""
|
||||
eff_base_url = base_url if base_url is not None else (self.base_url or "")
|
||||
|
|
@ -2365,7 +2372,9 @@ class AIAgent:
|
|||
eff_model = (model if model is not None else self.model) or ""
|
||||
|
||||
base_lower = eff_base_url.lower()
|
||||
is_claude = "claude" in eff_model.lower()
|
||||
model_lower = eff_model.lower()
|
||||
provider_lower = eff_provider.lower()
|
||||
is_claude = "claude" in model_lower
|
||||
is_openrouter = base_url_host_matches(eff_base_url, "openrouter.ai")
|
||||
is_anthropic_wire = eff_api_mode == "anthropic_messages"
|
||||
is_native_anthropic = (
|
||||
|
|
@ -2380,6 +2389,22 @@ class AIAgent:
|
|||
if is_anthropic_wire and is_claude:
|
||||
# Third-party Anthropic-compatible gateway.
|
||||
return True, True
|
||||
|
||||
# Qwen/Alibaba on OpenCode (Zen/Go) and native DashScope: OpenAI-wire
|
||||
# transport that accepts Anthropic-style cache_control markers and
|
||||
# rewards them with real cache hits. Without this branch
|
||||
# qwen3.6-plus on opencode-go reports 0% cached tokens and burns
|
||||
# through the subscription on every turn.
|
||||
model_is_qwen = "qwen" in model_lower
|
||||
provider_is_alibaba_family = provider_lower in {
|
||||
"opencode", "opencode-zen", "opencode-go", "alibaba",
|
||||
}
|
||||
if provider_is_alibaba_family and model_is_qwen:
|
||||
# Envelope layout (native_anthropic=False): markers on inner
|
||||
# content parts, not top-level tool messages. Matches
|
||||
# pi-mono's "alibaba" cacheControlFormat.
|
||||
return True, False
|
||||
|
||||
return False, False
|
||||
|
||||
@staticmethod
|
||||
|
|
@ -6126,8 +6151,9 @@ class AIAgent:
|
|||
fb_base_url_hint = (fb.get("base_url") or "").strip() or None
|
||||
fb_api_key_hint = (fb.get("api_key") or "").strip() or None
|
||||
# For Ollama Cloud endpoints, pull OLLAMA_API_KEY from env
|
||||
# when no explicit key is in the fallback config.
|
||||
if fb_base_url_hint and "ollama.com" in fb_base_url_hint.lower() and not fb_api_key_hint:
|
||||
# when no explicit key is in the fallback config. Host match
|
||||
# (not substring) — see GHSA-76xc-57q6-vm5m.
|
||||
if fb_base_url_hint and base_url_host_matches(fb_base_url_hint, "ollama.com") and not fb_api_key_hint:
|
||||
fb_api_key_hint = os.getenv("OLLAMA_API_KEY") or None
|
||||
fb_client, _resolved_fb_model = resolve_provider_client(
|
||||
fb_provider, model=fb_model, raw_codex=True,
|
||||
|
|
@ -6548,6 +6574,15 @@ class AIAgent:
|
|||
return suffix
|
||||
return "[A multimodal message was converted to text for Anthropic compatibility.]"
|
||||
|
||||
def _get_anthropic_transport(self):
|
||||
"""Return the cached AnthropicTransport instance (lazy singleton)."""
|
||||
t = getattr(self, "_anthropic_transport", None)
|
||||
if t is None:
|
||||
from agent.transports import get_transport
|
||||
t = get_transport("anthropic_messages")
|
||||
self._anthropic_transport = t
|
||||
return t
|
||||
|
||||
def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list:
|
||||
if not any(
|
||||
isinstance(msg, dict) and self._content_has_image_parts(msg.get("content"))
|
||||
|
|
@ -6664,20 +6699,14 @@ class AIAgent:
|
|||
def _build_api_kwargs(self, api_messages: list) -> dict:
|
||||
"""Build the keyword arguments dict for the active API mode."""
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs
|
||||
_transport = self._get_anthropic_transport()
|
||||
anthropic_messages = self._prepare_anthropic_messages_for_api(api_messages)
|
||||
# Pass context_length (total input+output window) so the adapter can
|
||||
# clamp max_tokens (output cap) when the user configured a smaller
|
||||
# context window than the model's native output limit.
|
||||
ctx_len = getattr(self, "context_compressor", None)
|
||||
ctx_len = ctx_len.context_length if ctx_len else None
|
||||
# _ephemeral_max_output_tokens is set for one call when the API
|
||||
# returns "max_tokens too large given prompt" — it caps output to
|
||||
# the available window space without touching context_length.
|
||||
ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
|
||||
if ephemeral_out is not None:
|
||||
self._ephemeral_max_output_tokens = None # consume immediately
|
||||
return build_anthropic_kwargs(
|
||||
return _transport.build_kwargs(
|
||||
model=self.model,
|
||||
messages=anthropic_messages,
|
||||
tools=self.tools,
|
||||
|
|
@ -6909,6 +6938,34 @@ class AIAgent:
|
|||
# (the documented max output for qwen3-coder models) so the
|
||||
# model has adequate output budget for tool calls.
|
||||
api_kwargs.update(self._max_tokens_param(65536))
|
||||
elif (
|
||||
base_url_host_matches(self.base_url, "api.kimi.com")
|
||||
or base_url_host_matches(self.base_url, "moonshot.ai")
|
||||
or base_url_host_matches(self.base_url, "moonshot.cn")
|
||||
):
|
||||
# Kimi/Moonshot defaults to a low max_tokens when omitted.
|
||||
# Reasoning tokens share the output budget — without an explicit
|
||||
# value the model can exhaust it on thinking alone, causing
|
||||
# "Response truncated due to output length limit". 32000 matches
|
||||
# Kimi CLI's default (see MoonshotAI/kimi-cli kimi.py generate()).
|
||||
api_kwargs.update(self._max_tokens_param(32000))
|
||||
# Kimi requires reasoning_effort as a top-level chat completions
|
||||
# parameter (not inside extra_body). Mirror Kimi CLI's
|
||||
# with_generation_kwargs(reasoning_effort=...) / with_thinking():
|
||||
# when thinking is disabled, Kimi CLI omits reasoning_effort
|
||||
# entirely (maps to None).
|
||||
_kimi_thinking_off = bool(
|
||||
self.reasoning_config
|
||||
and isinstance(self.reasoning_config, dict)
|
||||
and self.reasoning_config.get("enabled") is False
|
||||
)
|
||||
if not _kimi_thinking_off:
|
||||
_kimi_effort = "medium"
|
||||
if self.reasoning_config and isinstance(self.reasoning_config, dict):
|
||||
_e = (self.reasoning_config.get("effort") or "").strip().lower()
|
||||
if _e in ("low", "medium", "high"):
|
||||
_kimi_effort = _e
|
||||
api_kwargs["reasoning_effort"] = _kimi_effort
|
||||
elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower():
|
||||
# OpenRouter and Nous Portal translate requests to Anthropic's
|
||||
# Messages API, which requires max_tokens as a mandatory field.
|
||||
|
|
@ -6940,6 +6997,24 @@ class AIAgent:
|
|||
extra_body["provider"] = provider_preferences
|
||||
_is_nous = "nousresearch" in self._base_url_lower
|
||||
|
||||
# Kimi/Moonshot API uses extra_body.thinking (separate from the
|
||||
# top-level reasoning_effort) to enable/disable reasoning mode.
|
||||
# Mirror Kimi CLI's with_thinking() behavior exactly — see
|
||||
# MoonshotAI/kimi-cli packages/kosong/src/kosong/chat_provider/kimi.py
|
||||
_is_kimi = (
|
||||
base_url_host_matches(self.base_url, "api.kimi.com")
|
||||
or base_url_host_matches(self.base_url, "moonshot.ai")
|
||||
or base_url_host_matches(self.base_url, "moonshot.cn")
|
||||
)
|
||||
if _is_kimi:
|
||||
_kimi_thinking_enabled = True
|
||||
if self.reasoning_config and isinstance(self.reasoning_config, dict):
|
||||
if self.reasoning_config.get("enabled") is False:
|
||||
_kimi_thinking_enabled = False
|
||||
extra_body["thinking"] = {
|
||||
"type": "enabled" if _kimi_thinking_enabled else "disabled",
|
||||
}
|
||||
|
||||
if self._supports_reasoning_extra_body():
|
||||
if _is_github_models:
|
||||
github_reasoning = self._github_models_reasoning_extra_body()
|
||||
|
|
@ -7362,9 +7437,9 @@ class AIAgent:
|
|||
codex_kwargs["max_output_tokens"] = 5120
|
||||
response = self._run_codex_stream(codex_kwargs)
|
||||
elif not _aux_available and self.api_mode == "anthropic_messages":
|
||||
# Native Anthropic — use the Anthropic client directly
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs as _build_ant_kwargs
|
||||
ant_kwargs = _build_ant_kwargs(
|
||||
# Native Anthropic — use the transport for kwargs
|
||||
_tflush = self._get_anthropic_transport()
|
||||
ant_kwargs = _tflush.build_kwargs(
|
||||
model=self.model, messages=api_messages,
|
||||
tools=[memory_tool_def], max_tokens=5120,
|
||||
reasoning_config=None,
|
||||
|
|
@ -7392,10 +7467,15 @@ class AIAgent:
|
|||
if assistant_msg and assistant_msg.tool_calls:
|
||||
tool_calls = assistant_msg.tool_calls
|
||||
elif self.api_mode == "anthropic_messages" and not _aux_available:
|
||||
from agent.anthropic_adapter import normalize_anthropic_response as _nar_flush
|
||||
_flush_msg, _ = _nar_flush(response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
if _flush_msg and _flush_msg.tool_calls:
|
||||
tool_calls = _flush_msg.tool_calls
|
||||
_tfn = self._get_anthropic_transport()
|
||||
_flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
if _flush_nr and _flush_nr.tool_calls:
|
||||
tool_calls = [
|
||||
SimpleNamespace(
|
||||
id=tc.id, type="function",
|
||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
) for tc in _flush_nr.tool_calls
|
||||
]
|
||||
elif hasattr(response, "choices") and response.choices:
|
||||
assistant_message = response.choices[0].message
|
||||
if assistant_message.tool_calls:
|
||||
|
|
@ -8455,14 +8535,14 @@ class AIAgent:
|
|||
summary_kwargs["extra_body"] = summary_extra_body
|
||||
|
||||
if self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs as _bak, normalize_anthropic_response as _nar
|
||||
_ant_kw = _bak(model=self.model, messages=api_messages, tools=None,
|
||||
_tsum = self._get_anthropic_transport()
|
||||
_ant_kw = _tsum.build_kwargs(model=self.model, messages=api_messages, tools=None,
|
||||
max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
|
||||
is_oauth=self._is_anthropic_oauth,
|
||||
preserve_dots=self._anthropic_preserve_dots())
|
||||
summary_response = self._anthropic_messages_create(_ant_kw)
|
||||
_msg, _ = _nar(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
final_response = (_msg.content or "").strip()
|
||||
_sum_nr = _tsum.normalize_response(summary_response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
final_response = (_sum_nr.content or "").strip()
|
||||
else:
|
||||
summary_response = self._ensure_primary_openai_client(reason="iteration_limit_summary").chat.completions.create(**summary_kwargs)
|
||||
|
||||
|
|
@ -8487,14 +8567,14 @@ class AIAgent:
|
|||
retry_msg, _ = self._normalize_codex_response(retry_response)
|
||||
final_response = (retry_msg.content or "").strip() if retry_msg else ""
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import build_anthropic_kwargs as _bak2, normalize_anthropic_response as _nar2
|
||||
_ant_kw2 = _bak2(model=self.model, messages=api_messages, tools=None,
|
||||
_tretry = self._get_anthropic_transport()
|
||||
_ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None,
|
||||
is_oauth=self._is_anthropic_oauth,
|
||||
max_tokens=self.max_tokens, reasoning_config=self.reasoning_config,
|
||||
preserve_dots=self._anthropic_preserve_dots())
|
||||
retry_response = self._anthropic_messages_create(_ant_kw2)
|
||||
_retry_msg, _ = _nar2(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
final_response = (_retry_msg.content or "").strip()
|
||||
_retry_nr = _tretry.normalize_response(retry_response, strip_tool_prefix=self._is_anthropic_oauth)
|
||||
final_response = (_retry_nr.content or "").strip()
|
||||
else:
|
||||
summary_kwargs = {
|
||||
"model": self.model,
|
||||
|
|
@ -9363,16 +9443,13 @@ class AIAgent:
|
|||
response_invalid = True
|
||||
error_details.append("response.output is empty")
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
content_blocks = getattr(response, "content", None) if response is not None else None
|
||||
if response is None:
|
||||
_tv = self._get_anthropic_transport()
|
||||
if not _tv.validate_response(response):
|
||||
response_invalid = True
|
||||
error_details.append("response is None")
|
||||
elif not isinstance(content_blocks, list):
|
||||
response_invalid = True
|
||||
error_details.append("response.content is not a list")
|
||||
elif not content_blocks:
|
||||
response_invalid = True
|
||||
error_details.append("response.content is empty")
|
||||
if response is None:
|
||||
error_details.append("response is None")
|
||||
else:
|
||||
error_details.append("response.content invalid (not a non-empty list)")
|
||||
else:
|
||||
if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices:
|
||||
response_invalid = True
|
||||
|
|
@ -9533,8 +9610,8 @@ class AIAgent:
|
|||
else:
|
||||
finish_reason = "stop"
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
stop_reason_map = {"end_turn": "stop", "tool_use": "tool_calls", "max_tokens": "length", "stop_sequence": "stop"}
|
||||
finish_reason = stop_reason_map.get(response.stop_reason, "stop")
|
||||
_tfr = self._get_anthropic_transport()
|
||||
finish_reason = _tfr.map_finish_reason(response.stop_reason)
|
||||
else:
|
||||
finish_reason = response.choices[0].finish_reason
|
||||
assistant_message = response.choices[0].message
|
||||
|
|
@ -9563,10 +9640,24 @@ class AIAgent:
|
|||
if self.api_mode in ("chat_completions", "bedrock_converse"):
|
||||
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import normalize_anthropic_response
|
||||
_trunc_msg, _ = normalize_anthropic_response(
|
||||
_trunc_nr = self._get_anthropic_transport().normalize_response(
|
||||
response, strip_tool_prefix=self._is_anthropic_oauth
|
||||
)
|
||||
_trunc_msg = SimpleNamespace(
|
||||
content=_trunc_nr.content,
|
||||
tool_calls=[
|
||||
SimpleNamespace(
|
||||
id=tc.id, type="function",
|
||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
) for tc in (_trunc_nr.tool_calls or [])
|
||||
] or None,
|
||||
reasoning=_trunc_nr.reasoning,
|
||||
reasoning_content=None,
|
||||
reasoning_details=(
|
||||
_trunc_nr.provider_data.get("reasoning_details")
|
||||
if _trunc_nr.provider_data else None
|
||||
),
|
||||
)
|
||||
|
||||
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
|
||||
_trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
|
||||
|
|
@ -9822,21 +9913,27 @@ class AIAgent:
|
|||
if self.verbose_logging:
|
||||
logging.debug(f"Token usage: prompt={usage_dict['prompt_tokens']:,}, completion={usage_dict['completion_tokens']:,}, total={usage_dict['total_tokens']:,}")
|
||||
|
||||
# Log cache hit stats when prompt caching is active
|
||||
if self._use_prompt_caching:
|
||||
if self.api_mode == "anthropic_messages":
|
||||
# Anthropic uses cache_read_input_tokens / cache_creation_input_tokens
|
||||
cached = getattr(response.usage, 'cache_read_input_tokens', 0) or 0
|
||||
written = getattr(response.usage, 'cache_creation_input_tokens', 0) or 0
|
||||
else:
|
||||
# OpenRouter uses prompt_tokens_details.cached_tokens
|
||||
details = getattr(response.usage, 'prompt_tokens_details', None)
|
||||
cached = getattr(details, 'cached_tokens', 0) or 0 if details else 0
|
||||
written = getattr(details, 'cache_write_tokens', 0) or 0 if details else 0
|
||||
prompt = usage_dict["prompt_tokens"]
|
||||
# Surface cache hit stats for any provider that reports
|
||||
# them — not just those where we inject cache_control
|
||||
# markers. OpenAI/Kimi/DeepSeek/Qwen all do automatic
|
||||
# server-side prefix caching and return
|
||||
# ``prompt_tokens_details.cached_tokens``; users
|
||||
# previously could not see their cache % because this
|
||||
# line was gated on ``_use_prompt_caching``, which is
|
||||
# only True for Anthropic-style marker injection.
|
||||
# ``canonical_usage`` is already normalised from all
|
||||
# three API shapes (Anthropic / Codex / OpenAI-chat)
|
||||
# so we can rely on its values directly.
|
||||
cached = canonical_usage.cache_read_tokens
|
||||
written = canonical_usage.cache_write_tokens
|
||||
prompt = usage_dict["prompt_tokens"]
|
||||
if (cached or written) and not self.quiet_mode:
|
||||
hit_pct = (cached / prompt * 100) if prompt > 0 else 0
|
||||
if not self.quiet_mode:
|
||||
self._vprint(f"{self.log_prefix} 💾 Cache: {cached:,}/{prompt:,} tokens ({hit_pct:.0f}% hit, {written:,} written)")
|
||||
self._vprint(
|
||||
f"{self.log_prefix} 💾 Cache: "
|
||||
f"{cached:,}/{prompt:,} tokens "
|
||||
f"({hit_pct:.0f}% hit, {written:,} written)"
|
||||
)
|
||||
|
||||
has_retried_429 = False # Reset on success
|
||||
# Clear Nous rate limit state on successful request —
|
||||
|
|
@ -10772,10 +10869,31 @@ class AIAgent:
|
|||
if self.api_mode == "codex_responses":
|
||||
assistant_message, finish_reason = self._normalize_codex_response(response)
|
||||
elif self.api_mode == "anthropic_messages":
|
||||
from agent.anthropic_adapter import normalize_anthropic_response
|
||||
assistant_message, finish_reason = normalize_anthropic_response(
|
||||
_transport = self._get_anthropic_transport()
|
||||
_nr = _transport.normalize_response(
|
||||
response, strip_tool_prefix=self._is_anthropic_oauth
|
||||
)
|
||||
# Back-compat shim: downstream code expects SimpleNamespace with
|
||||
# .content, .tool_calls, .reasoning, .reasoning_content,
|
||||
# .reasoning_details attributes.
|
||||
assistant_message = SimpleNamespace(
|
||||
content=_nr.content,
|
||||
tool_calls=[
|
||||
SimpleNamespace(
|
||||
id=tc.id,
|
||||
type="function",
|
||||
function=SimpleNamespace(name=tc.name, arguments=tc.arguments),
|
||||
)
|
||||
for tc in (_nr.tool_calls or [])
|
||||
] or None,
|
||||
reasoning=_nr.reasoning,
|
||||
reasoning_content=None,
|
||||
reasoning_details=(
|
||||
_nr.provider_data.get("reasoning_details")
|
||||
if _nr.provider_data else None
|
||||
),
|
||||
)
|
||||
finish_reason = _nr.finish_reason
|
||||
else:
|
||||
assistant_message = response.choices[0].message
|
||||
|
||||
|
|
|
|||
|
|
@ -56,6 +56,8 @@ AUTHOR_MAP = {
|
|||
"185121704+stablegenius49@users.noreply.github.com": "stablegenius49",
|
||||
"101283333+batuhankocyigit@users.noreply.github.com": "batuhankocyigit",
|
||||
"valdi.jorge@gmail.com": "jvcl",
|
||||
"francip@gmail.com": "francip",
|
||||
"omni@comelse.com": "omnissiah-comelse",
|
||||
"oussama.redcode@gmail.com": "mavrickdeveloper",
|
||||
"126368201+vilkasdev@users.noreply.github.com": "vilkasdev",
|
||||
"137614867+cutepawss@users.noreply.github.com": "cutepawss",
|
||||
|
|
@ -95,20 +97,24 @@ AUTHOR_MAP = {
|
|||
"i@troy-y.org": "TroyMitchell911",
|
||||
"mygamez@163.com": "zhongyueming1121",
|
||||
"hansnow@users.noreply.github.com": "hansnow",
|
||||
"134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY",
|
||||
# contributors (manual mapping from git names)
|
||||
"ahmedsherif95@gmail.com": "asheriif",
|
||||
"liujinkun@bytedance.com": "liujinkun2025",
|
||||
"dmayhem93@gmail.com": "dmahan93",
|
||||
"fr@tecompanytea.com": "ifrederico",
|
||||
"cdanis@gmail.com": "cdanis",
|
||||
"samherring99@gmail.com": "samherring99",
|
||||
"desaiaum08@gmail.com": "Aum08Desai",
|
||||
"shannon.sands.1979@gmail.com": "shannonsands",
|
||||
"shannon@nousresearch.com": "shannonsands",
|
||||
"abdi.moya@gmail.com": "AxDSan",
|
||||
"eri@plasticlabs.ai": "Erosika",
|
||||
"hjcpuro@gmail.com": "hjc-puro",
|
||||
"xaydinoktay@gmail.com": "aydnOktay",
|
||||
"abdullahfarukozden@gmail.com": "Farukest",
|
||||
"lovre.pesut@gmail.com": "rovle",
|
||||
"xjtumj@gmail.com": "mengjian-github",
|
||||
"kevinskysunny@gmail.com": "kevinskysunny",
|
||||
"xiewenxuan462@gmail.com": "yule975",
|
||||
"yiweimeng.dlut@hotmail.com": "meng93",
|
||||
|
|
@ -308,6 +314,7 @@ AUTHOR_MAP = {
|
|||
"anthhub@163.com": "anthhub",
|
||||
"shenuu@gmail.com": "shenuu",
|
||||
"xiayh17@gmail.com": "xiayh0107",
|
||||
"zhujianxyz@gmail.com": "opriz",
|
||||
"asurla@nvidia.com": "anniesurla",
|
||||
"limkuan24@gmail.com": "WideLee",
|
||||
"aviralarora002@gmail.com": "AviArora02-commits",
|
||||
|
|
@ -323,6 +330,8 @@ AUTHOR_MAP = {
|
|||
"aniruddhaadak80@users.noreply.github.com": "aniruddhaadak80",
|
||||
"zheng.jerilyn@gmail.com": "jerilynzheng",
|
||||
"asslaenn5@gmail.com": "Aslaaen",
|
||||
"shalompmc0505@naver.com": "pinion05",
|
||||
"105142614+VTRiot@users.noreply.github.com": "VTRiot",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -372,6 +372,37 @@ async function startSocket() {
|
|||
const app = express();
|
||||
app.use(express.json());
|
||||
|
||||
// Host-header validation — defends against DNS rebinding.
|
||||
// The bridge binds loopback-only (127.0.0.1) but a victim browser on
|
||||
// the same machine could be tricked into fetching from an attacker
|
||||
// hostname that TTL-flips to 127.0.0.1. Reject any request whose Host
|
||||
// header doesn't resolve to a loopback alias.
|
||||
// See GHSA-ppp5-vxwm-4cf7.
|
||||
const _ACCEPTED_HOST_VALUES = new Set([
|
||||
'localhost',
|
||||
'127.0.0.1',
|
||||
'[::1]',
|
||||
'::1',
|
||||
]);
|
||||
|
||||
app.use((req, res, next) => {
|
||||
const raw = (req.headers.host || '').trim();
|
||||
if (!raw) {
|
||||
return res.status(400).json({ error: 'Missing Host header' });
|
||||
}
|
||||
// Strip port suffix: "localhost:3000" → "localhost"
|
||||
const hostOnly = (raw.includes(':')
|
||||
? raw.substring(0, raw.lastIndexOf(':'))
|
||||
: raw
|
||||
).replace(/^\[|\]$/g, '').toLowerCase();
|
||||
if (!_ACCEPTED_HOST_VALUES.has(hostOnly)) {
|
||||
return res.status(400).json({
|
||||
error: 'Invalid Host header. Bridge accepts loopback hosts only.',
|
||||
});
|
||||
}
|
||||
next();
|
||||
});
|
||||
|
||||
// Poll for new messages (long-poll style)
|
||||
app.get('/messages', (req, res) => {
|
||||
const msgs = messageQueue.splice(0, messageQueue.length);
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
name: maps
|
||||
description: >
|
||||
Location intelligence — geocode a place, reverse-geocode coordinates,
|
||||
find nearby places (44 POI categories), driving/walking/cycling
|
||||
find nearby places (46 POI categories), driving/walking/cycling
|
||||
distance + time, turn-by-turn directions, timezone lookup, bounding
|
||||
box + area for a named place, and POI search within a rectangle.
|
||||
Uses OpenStreetMap + Overpass + OSRM. Free, no API key.
|
||||
|
|
@ -83,12 +83,13 @@ python3 $MAPS nearby --near "90210" --category pharmacy
|
|||
python3 $MAPS nearby --near "downtown austin" --category restaurant --category bar --limit 10
|
||||
```
|
||||
|
||||
44 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, supermarket,
|
||||
atm, gas_station, parking, museum, park, school, university, bank, police,
|
||||
fire_station, library, airport, train_station, bus_stop, church, mosque,
|
||||
synagogue, dentist, doctor, cinema, theatre, gym, swimming_pool, post_office,
|
||||
convenience_store, bakery, bookshop, laundry, car_wash, car_rental,
|
||||
bicycle_rental, taxi, veterinary, zoo, playground, stadium, nightclub.
|
||||
46 categories: restaurant, cafe, bar, hospital, pharmacy, hotel, guest_house,
|
||||
camp_site, supermarket, atm, gas_station, parking, museum, park, school,
|
||||
university, bank, police, fire_station, library, airport, train_station,
|
||||
bus_stop, church, mosque, synagogue, dentist, doctor, cinema, theatre, gym,
|
||||
swimming_pool, post_office, convenience_store, bakery, bookshop, laundry,
|
||||
car_wash, car_rental, bicycle_rental, taxi, veterinary, zoo, playground,
|
||||
stadium, nightclub.
|
||||
|
||||
Each result includes: `name`, `address`, `lat`/`lon`, `distance_m`,
|
||||
`maps_url` (clickable Google Maps link), `directions_url` (Google Maps
|
||||
|
|
|
|||
|
|
@ -58,7 +58,9 @@ CATEGORY_TAGS = {
|
|||
"restaurant": ("amenity", "restaurant"),
|
||||
"cafe": ("amenity", "cafe"),
|
||||
"bar": ("amenity", "bar"),
|
||||
"bakery": ("shop", "bakery"),
|
||||
# bakery is tagged as shop=bakery in the OSM wiki, but some mappers use
|
||||
# amenity=bakery. Search both so small indie bakeries aren't missed.
|
||||
"bakery": [("shop", "bakery"), ("amenity", "bakery")],
|
||||
"convenience_store": ("shop", "convenience"),
|
||||
# Health
|
||||
"hospital": ("amenity", "hospital"),
|
||||
|
|
@ -68,6 +70,8 @@ CATEGORY_TAGS = {
|
|||
"veterinary": ("amenity", "veterinary"),
|
||||
# Accommodation
|
||||
"hotel": ("tourism", "hotel"),
|
||||
"guest_house": ("tourism", "guest_house"),
|
||||
"camp_site": ("tourism", "camp_site"),
|
||||
# Shopping & Services
|
||||
"supermarket": ("shop", "supermarket"),
|
||||
"bookshop": ("shop", "books"),
|
||||
|
|
@ -120,6 +124,19 @@ RELIGION_FILTER = {
|
|||
|
||||
VALID_CATEGORIES = sorted(CATEGORY_TAGS.keys())
|
||||
|
||||
|
||||
def _tags_for(category):
|
||||
"""Return the CATEGORY_TAGS entry as a list of (key, value) pairs.
|
||||
|
||||
Most categories map to a single (tag_key, tag_val) tuple, but some
|
||||
(e.g. ``bakery``) are tagged under more than one OSM key and are
|
||||
represented as a list of tuples. Normalise both forms to a list.
|
||||
"""
|
||||
entry = CATEGORY_TAGS[category]
|
||||
if isinstance(entry, list):
|
||||
return list(entry)
|
||||
return [entry]
|
||||
|
||||
OSRM_PROFILES = {
|
||||
"driving": "driving",
|
||||
"walking": "foot",
|
||||
|
|
@ -338,36 +355,63 @@ def geocode_single(query):
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
def build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
|
||||
religion=None):
|
||||
"""Build an Overpass QL query for nearby POIs around a point."""
|
||||
religion=None, tag_pairs=None):
|
||||
"""Build an Overpass QL query for nearby POIs around a point.
|
||||
|
||||
If ``tag_pairs`` is provided, the query unions across every
|
||||
``(key, value)`` pair (used for categories like ``bakery`` that are
|
||||
tagged under more than one OSM key). Otherwise falls back to the
|
||||
single ``tag_key``/``tag_val`` pair for back-compat.
|
||||
"""
|
||||
pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)]
|
||||
religion_filter = ""
|
||||
if religion:
|
||||
religion_filter = f'["religion"="{religion}"]'
|
||||
body_lines = []
|
||||
for k, v in pairs:
|
||||
body_lines.append(
|
||||
f' node["{k}"="{v}"]{religion_filter}'
|
||||
f'(around:{radius},{lat},{lon});'
|
||||
)
|
||||
body_lines.append(
|
||||
f' way["{k}"="{v}"]{religion_filter}'
|
||||
f'(around:{radius},{lat},{lon});'
|
||||
)
|
||||
body = "\n".join(body_lines)
|
||||
return (
|
||||
f'[out:json][timeout:25];\n'
|
||||
f'(\n'
|
||||
f' node["{tag_key}"="{tag_val}"]{religion_filter}'
|
||||
f'(around:{radius},{lat},{lon});\n'
|
||||
f' way["{tag_key}"="{tag_val}"]{religion_filter}'
|
||||
f'(around:{radius},{lat},{lon});\n'
|
||||
f'{body}\n'
|
||||
f');\n'
|
||||
f'out center {limit};\n'
|
||||
)
|
||||
|
||||
|
||||
def build_overpass_bbox(tag_key, tag_val, south, west, north, east, limit,
|
||||
religion=None):
|
||||
"""Build an Overpass QL query for POIs within a bounding box."""
|
||||
religion=None, tag_pairs=None):
|
||||
"""Build an Overpass QL query for POIs within a bounding box.
|
||||
|
||||
See ``build_overpass_nearby`` for ``tag_pairs`` semantics.
|
||||
"""
|
||||
pairs = tag_pairs if tag_pairs else [(tag_key, tag_val)]
|
||||
religion_filter = ""
|
||||
if religion:
|
||||
religion_filter = f'["religion"="{religion}"]'
|
||||
body_lines = []
|
||||
for k, v in pairs:
|
||||
body_lines.append(
|
||||
f' node["{k}"="{v}"]{religion_filter}'
|
||||
f'({south},{west},{north},{east});'
|
||||
)
|
||||
body_lines.append(
|
||||
f' way["{k}"="{v}"]{religion_filter}'
|
||||
f'({south},{west},{north},{east});'
|
||||
)
|
||||
body = "\n".join(body_lines)
|
||||
return (
|
||||
f'[out:json][timeout:25];\n'
|
||||
f'(\n'
|
||||
f' node["{tag_key}"="{tag_val}"]{religion_filter}'
|
||||
f'({south},{west},{north},{east});\n'
|
||||
f' way["{tag_key}"="{tag_val}"]{religion_filter}'
|
||||
f'({south},{west},{north},{east});\n'
|
||||
f'{body}\n'
|
||||
f');\n'
|
||||
f'out center {limit};\n'
|
||||
)
|
||||
|
|
@ -605,10 +649,10 @@ def cmd_nearby(args):
|
|||
# appear twice.
|
||||
merged = {}
|
||||
for category in categories:
|
||||
tag_key, tag_val = CATEGORY_TAGS[category]
|
||||
tag_pairs = _tags_for(category)
|
||||
religion = RELIGION_FILTER.get(category)
|
||||
query = build_overpass_nearby(tag_key, tag_val, lat, lon, radius, limit,
|
||||
religion=religion)
|
||||
query = build_overpass_nearby(None, None, lat, lon, radius, limit,
|
||||
religion=religion, tag_pairs=tag_pairs)
|
||||
raw = overpass_query(query)
|
||||
elements = raw.get("elements", [])
|
||||
for place in parse_overpass_elements(elements, ref_lat=lat, ref_lon=lon):
|
||||
|
|
@ -945,10 +989,10 @@ def cmd_bbox(args):
|
|||
if limit <= 0:
|
||||
error_exit("Limit must be a positive integer.")
|
||||
|
||||
tag_key, tag_val = CATEGORY_TAGS[category]
|
||||
tag_pairs = _tags_for(category)
|
||||
religion = RELIGION_FILTER.get(category)
|
||||
query = build_overpass_bbox(tag_key, tag_val, south, west, north, east,
|
||||
limit, religion=religion)
|
||||
query = build_overpass_bbox(None, None, south, west, north, east,
|
||||
limit, religion=religion, tag_pairs=tag_pairs)
|
||||
|
||||
raw = overpass_query(query)
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
---
|
||||
name: xurl
|
||||
description: Interact with X/Twitter via xurl, the official X API CLI. Use for posting, replying, quoting, searching, timelines, mentions, likes, reposts, bookmarks, follows, DMs, media upload, and raw v2 endpoint access.
|
||||
version: 1.1.0
|
||||
version: 1.1.1
|
||||
author: xdevplatform + openclaw + Hermes Agent
|
||||
license: MIT
|
||||
platforms: [linux, macos]
|
||||
|
|
@ -95,6 +95,12 @@ These steps must be performed by the user directly, NOT by the agent, because th
|
|||
xurl auth oauth2 --app my-app
|
||||
```
|
||||
(This opens a browser for the OAuth 2.0 PKCE flow.)
|
||||
|
||||
If X returns a `UsernameNotFound` error or 403 on the post-OAuth `/2/users/me` lookup, pass your handle explicitly (xurl v1.1.0+):
|
||||
```bash
|
||||
xurl auth oauth2 --app my-app YOUR_USERNAME
|
||||
```
|
||||
This binds the token to your handle and skips the broken `/2/users/me` call.
|
||||
6. Set the app as default so all commands use it:
|
||||
```bash
|
||||
xurl auth default my-app
|
||||
|
|
@ -380,6 +386,7 @@ xurl --app staging /2/users/me # one-off against staging
|
|||
| --- | --- | --- |
|
||||
| Auth errors after successful OAuth flow | Token saved to `default` app (no client-id/secret) instead of your named app | `xurl auth oauth2 --app my-app` then `xurl auth default my-app` |
|
||||
| `unauthorized_client` during OAuth | App type set to "Native App" in X dashboard | Change to "Web app, automated app or bot" in User Authentication Settings |
|
||||
| `UsernameNotFound` or 403 on `/2/users/me` right after OAuth | X not returning username reliably from `/2/users/me` | Re-run `xurl auth oauth2 --app my-app YOUR_USERNAME` (xurl v1.1.0+) to pass the handle explicitly |
|
||||
| 401 on every request | Token expired or wrong default app | Check `xurl auth status` — verify `▸` points to an app with oauth2 tokens |
|
||||
| `client-forbidden` / `client-not-enrolled` | X platform enrollment issue | Dashboard → Apps → Manage → Move to "Pay-per-use" package → Production environment |
|
||||
| `CreditsDepleted` | $0 balance on X API | Buy credits (min $5) in Developer Console → Billing |
|
||||
|
|
|
|||
170
tests/acp/test_approval_isolation.py
Normal file
170
tests/acp/test_approval_isolation.py
Normal file
|
|
@ -0,0 +1,170 @@
|
|||
"""Tests for GHSA-96vc-wcxf-jjff and GHSA-qg5c-hvr5-hjgr.
|
||||
|
||||
Two related ACP approval-flow issues:
|
||||
- 96vc: ACP didn't set HERMES_EXEC_ASK, so `check_all_command_guards`
|
||||
took the non-interactive auto-approve path and never consulted the
|
||||
ACP-supplied callback.
|
||||
- qg5c: `_approval_callback` was a module-global in terminal_tool;
|
||||
overlapping ACP sessions overwrote each other's callback slot.
|
||||
|
||||
Both fixed together by:
|
||||
1. Setting HERMES_EXEC_ASK inside _run_agent (wraps the agent call).
|
||||
2. Storing the callback in thread-local state so concurrent executor
|
||||
threads don't collide.
|
||||
"""
|
||||
|
||||
import os
|
||||
import threading
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
class TestThreadLocalApprovalCallback:
|
||||
"""GHSA-qg5c-hvr5-hjgr: set_approval_callback must be per-thread so
|
||||
concurrent ACP sessions don't stomp on each other's handlers."""
|
||||
|
||||
def test_set_and_get_in_same_thread(self):
|
||||
from tools.terminal_tool import (
|
||||
set_approval_callback,
|
||||
_get_approval_callback,
|
||||
)
|
||||
|
||||
cb1 = lambda cmd, desc: "once" # noqa: E731
|
||||
set_approval_callback(cb1)
|
||||
assert _get_approval_callback() is cb1
|
||||
|
||||
def test_callback_not_visible_in_different_thread(self):
|
||||
"""Thread A's callback is NOT visible to Thread B."""
|
||||
from tools.terminal_tool import (
|
||||
set_approval_callback,
|
||||
_get_approval_callback,
|
||||
)
|
||||
|
||||
cb_a = lambda cmd, desc: "thread_a" # noqa: E731
|
||||
cb_b = lambda cmd, desc: "thread_b" # noqa: E731
|
||||
|
||||
seen_in_a = []
|
||||
seen_in_b = []
|
||||
|
||||
def thread_a():
|
||||
set_approval_callback(cb_a)
|
||||
# Pause so thread B has time to set its own callback
|
||||
import time
|
||||
time.sleep(0.05)
|
||||
seen_in_a.append(_get_approval_callback())
|
||||
|
||||
def thread_b():
|
||||
set_approval_callback(cb_b)
|
||||
import time
|
||||
time.sleep(0.05)
|
||||
seen_in_b.append(_get_approval_callback())
|
||||
|
||||
ta = threading.Thread(target=thread_a)
|
||||
tb = threading.Thread(target=thread_b)
|
||||
ta.start()
|
||||
tb.start()
|
||||
ta.join()
|
||||
tb.join()
|
||||
|
||||
# Each thread must see ONLY its own callback — not the other's
|
||||
assert seen_in_a == [cb_a]
|
||||
assert seen_in_b == [cb_b]
|
||||
|
||||
def test_main_thread_callback_not_leaked_to_worker(self):
|
||||
"""A callback set in the main thread does NOT leak into a
|
||||
freshly-spawned worker thread."""
|
||||
from tools.terminal_tool import (
|
||||
set_approval_callback,
|
||||
_get_approval_callback,
|
||||
)
|
||||
|
||||
cb_main = lambda cmd, desc: "main" # noqa: E731
|
||||
set_approval_callback(cb_main)
|
||||
|
||||
worker_saw = []
|
||||
|
||||
def worker():
|
||||
worker_saw.append(_get_approval_callback())
|
||||
|
||||
t = threading.Thread(target=worker)
|
||||
t.start()
|
||||
t.join()
|
||||
|
||||
# Worker thread has no callback set — TLS is empty for it
|
||||
assert worker_saw == [None]
|
||||
# Main thread still has its callback
|
||||
assert _get_approval_callback() is cb_main
|
||||
|
||||
def test_sudo_password_callback_also_thread_local(self):
|
||||
"""Same protection applies to the sudo password callback."""
|
||||
from tools.terminal_tool import (
|
||||
set_sudo_password_callback,
|
||||
_get_sudo_password_callback,
|
||||
)
|
||||
|
||||
cb_main = lambda: "main-password" # noqa: E731
|
||||
set_sudo_password_callback(cb_main)
|
||||
|
||||
worker_saw = []
|
||||
|
||||
def worker():
|
||||
worker_saw.append(_get_sudo_password_callback())
|
||||
|
||||
t = threading.Thread(target=worker)
|
||||
t.start()
|
||||
t.join()
|
||||
|
||||
assert worker_saw == [None]
|
||||
assert _get_sudo_password_callback() is cb_main
|
||||
|
||||
|
||||
class TestAcpExecAskGate:
|
||||
"""GHSA-96vc-wcxf-jjff: ACP's _run_agent must set HERMES_INTERACTIVE so
|
||||
that tools.approval.check_all_command_guards takes the CLI-interactive
|
||||
path (consults the registered callback via prompt_dangerous_approval)
|
||||
instead of the non-interactive auto-approve shortcut.
|
||||
|
||||
(HERMES_EXEC_ASK takes the gateway-queue path which requires a
|
||||
notify_cb registered in _gateway_notify_cbs — not applicable to ACP,
|
||||
which uses a direct callback shape.)"""
|
||||
|
||||
def test_interactive_env_var_routes_to_callback(self, monkeypatch):
|
||||
"""When HERMES_INTERACTIVE is set and an approval callback is
|
||||
registered, a dangerous command must route through the callback."""
|
||||
# Clean env
|
||||
monkeypatch.delenv("HERMES_INTERACTIVE", raising=False)
|
||||
monkeypatch.delenv("HERMES_GATEWAY_SESSION", raising=False)
|
||||
monkeypatch.delenv("HERMES_EXEC_ASK", raising=False)
|
||||
monkeypatch.delenv("HERMES_YOLO_MODE", raising=False)
|
||||
|
||||
from tools.approval import check_all_command_guards
|
||||
|
||||
called_with = []
|
||||
|
||||
def fake_cb(command, description, *, allow_permanent=True):
|
||||
called_with.append((command, description))
|
||||
return "once"
|
||||
|
||||
# Without HERMES_INTERACTIVE: takes auto-approve path, callback NOT called
|
||||
result = check_all_command_guards(
|
||||
"rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
|
||||
)
|
||||
assert result["approved"] is True
|
||||
assert called_with == [], (
|
||||
"without HERMES_INTERACTIVE the non-interactive auto-approve "
|
||||
"path should fire without consulting the callback"
|
||||
)
|
||||
|
||||
# With HERMES_INTERACTIVE: callback IS called, approval flows through it
|
||||
monkeypatch.setenv("HERMES_INTERACTIVE", "1")
|
||||
called_with.clear()
|
||||
result = check_all_command_guards(
|
||||
"rm -rf /tmp/test-exec-ask", "local", approval_callback=fake_cb,
|
||||
)
|
||||
assert called_with, (
|
||||
"with HERMES_INTERACTIVE the approval path should consult the "
|
||||
"registered callback — this was the ACP bypass in "
|
||||
"GHSA-96vc-wcxf-jjff"
|
||||
)
|
||||
assert result["approved"] is True
|
||||
|
|
@ -73,3 +73,17 @@ class TestApprovalMapping:
|
|||
result = cb("rm -rf /", "dangerous")
|
||||
|
||||
assert result == "deny"
|
||||
|
||||
def test_approval_none_response_returns_deny(self):
|
||||
"""When request_permission resolves to None, the callback should return 'deny'."""
|
||||
loop = MagicMock(spec=asyncio.AbstractEventLoop)
|
||||
mock_rp = MagicMock(name="request_permission")
|
||||
|
||||
future = MagicMock(spec=Future)
|
||||
future.result.return_value = None
|
||||
|
||||
with patch("acp_adapter.permissions.asyncio.run_coroutine_threadsafe", return_value=future):
|
||||
cb = make_approval_callback(mock_rp, loop, session_id="s1", timeout=1.0)
|
||||
result = cb("echo hi", "demo")
|
||||
|
||||
assert result == "deny"
|
||||
|
|
|
|||
|
|
@ -95,19 +95,37 @@ class TestInitialize:
|
|||
|
||||
class TestAuthenticate:
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_with_provider_configured(self, agent, monkeypatch):
|
||||
async def test_authenticate_with_matching_method_id(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.has_provider",
|
||||
lambda: True,
|
||||
"acp_adapter.server.detect_provider",
|
||||
lambda: "openrouter",
|
||||
)
|
||||
resp = await agent.authenticate(method_id="openrouter")
|
||||
assert isinstance(resp, AuthenticateResponse)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_is_case_insensitive(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.detect_provider",
|
||||
lambda: "openrouter",
|
||||
)
|
||||
resp = await agent.authenticate(method_id="OpenRouter")
|
||||
assert isinstance(resp, AuthenticateResponse)
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_rejects_mismatched_method_id(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.detect_provider",
|
||||
lambda: "openrouter",
|
||||
)
|
||||
resp = await agent.authenticate(method_id="totally-invalid-method")
|
||||
assert resp is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_authenticate_without_provider(self, agent, monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"acp_adapter.server.has_provider",
|
||||
lambda: False,
|
||||
"acp_adapter.server.detect_provider",
|
||||
lambda: None,
|
||||
)
|
||||
resp = await agent.authenticate(method_id="openrouter")
|
||||
assert resp is None
|
||||
|
|
@ -252,6 +270,57 @@ class TestListAndFork:
|
|||
|
||||
mock_list.assert_called_once_with(cwd="/mnt/e/Projects/AI/browser-link-3")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_pagination_first_page(self, agent):
|
||||
from acp_adapter import server as acp_server
|
||||
|
||||
infos = [
|
||||
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
|
||||
for i in range(acp_server._LIST_SESSIONS_PAGE_SIZE + 5)
|
||||
]
|
||||
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
|
||||
resp = await agent.list_sessions()
|
||||
|
||||
assert len(resp.sessions) == acp_server._LIST_SESSIONS_PAGE_SIZE
|
||||
assert resp.next_cursor == resp.sessions[-1].session_id
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_pagination_no_more(self, agent):
|
||||
infos = [
|
||||
{"session_id": f"s{i}", "cwd": "/tmp", "title": None, "updated_at": 0.0}
|
||||
for i in range(3)
|
||||
]
|
||||
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
|
||||
resp = await agent.list_sessions()
|
||||
|
||||
assert len(resp.sessions) == 3
|
||||
assert resp.next_cursor is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_cursor_resumes_after_match(self, agent):
|
||||
infos = [
|
||||
{"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
|
||||
{"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
|
||||
{"session_id": "s3", "cwd": "/tmp", "title": None, "updated_at": 0.0},
|
||||
]
|
||||
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
|
||||
resp = await agent.list_sessions(cursor="s1")
|
||||
|
||||
assert [s.session_id for s in resp.sessions] == ["s2", "s3"]
|
||||
assert resp.next_cursor is None
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_list_sessions_unknown_cursor_returns_empty(self, agent):
|
||||
infos = [
|
||||
{"session_id": "s1", "cwd": "/tmp", "title": None, "updated_at": 0.0},
|
||||
{"session_id": "s2", "cwd": "/tmp", "title": None, "updated_at": 0.0},
|
||||
]
|
||||
with patch.object(agent.session_manager, "list_sessions", return_value=infos):
|
||||
resp = await agent.list_sessions(cursor="does-not-exist")
|
||||
|
||||
assert resp.sessions == []
|
||||
assert resp.next_cursor is None
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# session configuration / model routing
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -414,7 +414,11 @@ class TestRunOauthSetupToken:
|
|||
token = run_oauth_setup_token()
|
||||
|
||||
assert token == "from-cred-file"
|
||||
mock_run.assert_called_once()
|
||||
# Don't assert exact call count — the contract is "credentials flow
|
||||
# through", not "exactly one subprocess call". xdist cross-test
|
||||
# pollution (other tests shimming subprocess via plugins) has flaked
|
||||
# assert_called_once() in CI.
|
||||
assert mock_run.called
|
||||
|
||||
def test_returns_token_from_env_var(self, monkeypatch, tmp_path):
|
||||
"""Falls back to CLAUDE_CODE_OAUTH_TOKEN env var when no cred files."""
|
||||
|
|
|
|||
238
tests/agent/test_anthropic_normalize_v2.py
Normal file
238
tests/agent/test_anthropic_normalize_v2.py
Normal file
|
|
@ -0,0 +1,238 @@
|
|||
"""Regression tests: normalize_anthropic_response_v2 vs v1.
|
||||
|
||||
Constructs mock Anthropic responses and asserts that the v2 function
|
||||
(returning NormalizedResponse) produces identical field values to the
|
||||
original v1 function (returning SimpleNamespace + finish_reason).
|
||||
"""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
|
||||
from agent.anthropic_adapter import (
|
||||
normalize_anthropic_response,
|
||||
normalize_anthropic_response_v2,
|
||||
)
|
||||
from agent.transports.types import NormalizedResponse, ToolCall
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers to build mock Anthropic SDK responses
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def _text_block(text: str):
|
||||
return SimpleNamespace(type="text", text=text)
|
||||
|
||||
|
||||
def _thinking_block(thinking: str, signature: str = "sig_abc"):
|
||||
return SimpleNamespace(type="thinking", thinking=thinking, signature=signature)
|
||||
|
||||
|
||||
def _tool_use_block(id: str, name: str, input: dict):
|
||||
return SimpleNamespace(type="tool_use", id=id, name=name, input=input)
|
||||
|
||||
|
||||
def _response(content_blocks, stop_reason="end_turn"):
|
||||
return SimpleNamespace(
|
||||
content=content_blocks,
|
||||
stop_reason=stop_reason,
|
||||
usage=SimpleNamespace(
|
||||
input_tokens=10,
|
||||
output_tokens=5,
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTextOnly:
|
||||
"""Text-only response — no tools, no thinking."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response([_text_block("Hello world")])
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_type(self):
|
||||
assert isinstance(self.v2, NormalizedResponse)
|
||||
|
||||
def test_content_matches(self):
|
||||
assert self.v2.content == self.v1_msg.content
|
||||
|
||||
def test_finish_reason_matches(self):
|
||||
assert self.v2.finish_reason == self.v1_finish
|
||||
|
||||
def test_no_tool_calls(self):
|
||||
assert self.v2.tool_calls is None
|
||||
assert self.v1_msg.tool_calls is None
|
||||
|
||||
def test_no_reasoning(self):
|
||||
assert self.v2.reasoning is None
|
||||
assert self.v1_msg.reasoning is None
|
||||
|
||||
|
||||
class TestWithToolCalls:
|
||||
"""Response with tool calls."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response(
|
||||
[
|
||||
_text_block("I'll check that"),
|
||||
_tool_use_block("toolu_abc", "terminal", {"command": "ls"}),
|
||||
_tool_use_block("toolu_def", "read_file", {"path": "/tmp"}),
|
||||
],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_finish_reason(self):
|
||||
assert self.v2.finish_reason == "tool_calls"
|
||||
assert self.v1_finish == "tool_calls"
|
||||
|
||||
def test_tool_call_count(self):
|
||||
assert len(self.v2.tool_calls) == 2
|
||||
assert len(self.v1_msg.tool_calls) == 2
|
||||
|
||||
def test_tool_call_ids_match(self):
|
||||
for i in range(2):
|
||||
assert self.v2.tool_calls[i].id == self.v1_msg.tool_calls[i].id
|
||||
|
||||
def test_tool_call_names_match(self):
|
||||
assert self.v2.tool_calls[0].name == "terminal"
|
||||
assert self.v2.tool_calls[1].name == "read_file"
|
||||
for i in range(2):
|
||||
assert self.v2.tool_calls[i].name == self.v1_msg.tool_calls[i].function.name
|
||||
|
||||
def test_tool_call_arguments_match(self):
|
||||
for i in range(2):
|
||||
assert self.v2.tool_calls[i].arguments == self.v1_msg.tool_calls[i].function.arguments
|
||||
|
||||
def test_content_preserved(self):
|
||||
assert self.v2.content == self.v1_msg.content
|
||||
assert "check that" in self.v2.content
|
||||
|
||||
|
||||
class TestWithThinking:
|
||||
"""Response with thinking blocks (Claude 3.5+ extended thinking)."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response([
|
||||
_thinking_block("Let me think about this carefully..."),
|
||||
_text_block("The answer is 42."),
|
||||
])
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_reasoning_matches(self):
|
||||
assert self.v2.reasoning == self.v1_msg.reasoning
|
||||
assert "think about this" in self.v2.reasoning
|
||||
|
||||
def test_reasoning_details_in_provider_data(self):
|
||||
v1_details = self.v1_msg.reasoning_details
|
||||
v2_details = self.v2.provider_data.get("reasoning_details") if self.v2.provider_data else None
|
||||
assert v1_details is not None
|
||||
assert v2_details is not None
|
||||
assert len(v2_details) == len(v1_details)
|
||||
|
||||
def test_content_excludes_thinking(self):
|
||||
assert self.v2.content == "The answer is 42."
|
||||
|
||||
|
||||
class TestMixed:
|
||||
"""Response with thinking + text + tool calls."""
|
||||
|
||||
def setup_method(self):
|
||||
self.resp = _response(
|
||||
[
|
||||
_thinking_block("Planning my approach..."),
|
||||
_text_block("I'll run the command"),
|
||||
_tool_use_block("toolu_xyz", "terminal", {"command": "pwd"}),
|
||||
],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
self.v1_msg, self.v1_finish = normalize_anthropic_response(self.resp)
|
||||
self.v2 = normalize_anthropic_response_v2(self.resp)
|
||||
|
||||
def test_all_fields_present(self):
|
||||
assert self.v2.content is not None
|
||||
assert self.v2.tool_calls is not None
|
||||
assert self.v2.reasoning is not None
|
||||
assert self.v2.finish_reason == "tool_calls"
|
||||
|
||||
def test_content_matches(self):
|
||||
assert self.v2.content == self.v1_msg.content
|
||||
|
||||
def test_reasoning_matches(self):
|
||||
assert self.v2.reasoning == self.v1_msg.reasoning
|
||||
|
||||
def test_tool_call_matches(self):
|
||||
assert self.v2.tool_calls[0].id == self.v1_msg.tool_calls[0].id
|
||||
assert self.v2.tool_calls[0].name == self.v1_msg.tool_calls[0].function.name
|
||||
|
||||
|
||||
class TestStopReasons:
|
||||
"""Verify finish_reason mapping matches between v1 and v2."""
|
||||
|
||||
@pytest.mark.parametrize("stop_reason,expected", [
|
||||
("end_turn", "stop"),
|
||||
("tool_use", "tool_calls"),
|
||||
("max_tokens", "length"),
|
||||
("stop_sequence", "stop"),
|
||||
("refusal", "content_filter"),
|
||||
("model_context_window_exceeded", "length"),
|
||||
("unknown_future_reason", "stop"),
|
||||
])
|
||||
def test_stop_reason_mapping(self, stop_reason, expected):
|
||||
resp = _response([_text_block("x")], stop_reason=stop_reason)
|
||||
v1_msg, v1_finish = normalize_anthropic_response(resp)
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert v2.finish_reason == v1_finish == expected
|
||||
|
||||
|
||||
class TestStripToolPrefix:
|
||||
"""Verify mcp_ prefix stripping works identically."""
|
||||
|
||||
def test_prefix_stripped(self):
|
||||
resp = _response(
|
||||
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=True)
|
||||
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=True)
|
||||
assert v1_msg.tool_calls[0].function.name == "terminal"
|
||||
assert v2.tool_calls[0].name == "terminal"
|
||||
|
||||
def test_prefix_kept(self):
|
||||
resp = _response(
|
||||
[_tool_use_block("toolu_1", "mcp_terminal", {"cmd": "ls"})],
|
||||
stop_reason="tool_use",
|
||||
)
|
||||
v1_msg, _ = normalize_anthropic_response(resp, strip_tool_prefix=False)
|
||||
v2 = normalize_anthropic_response_v2(resp, strip_tool_prefix=False)
|
||||
assert v1_msg.tool_calls[0].function.name == "mcp_terminal"
|
||||
assert v2.tool_calls[0].name == "mcp_terminal"
|
||||
|
||||
|
||||
class TestEdgeCases:
|
||||
"""Edge cases: empty content, no blocks, etc."""
|
||||
|
||||
def test_empty_content_blocks(self):
|
||||
resp = _response([])
|
||||
v1_msg, v1_finish = normalize_anthropic_response(resp)
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert v2.content == v1_msg.content
|
||||
assert v2.content is None
|
||||
|
||||
def test_no_reasoning_details_means_none_provider_data(self):
|
||||
resp = _response([_text_block("hi")])
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert v2.provider_data is None
|
||||
|
||||
def test_v2_returns_dataclass_not_namespace(self):
|
||||
resp = _response([_text_block("hi")])
|
||||
v2 = normalize_anthropic_response_v2(resp)
|
||||
assert isinstance(v2, NormalizedResponse)
|
||||
assert not isinstance(v2, SimpleNamespace)
|
||||
146
tests/agent/test_copilot_acp_client.py
Normal file
146
tests/agent/test_copilot_acp_client.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""Focused regressions for the Copilot ACP shim safety layer."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import os
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
from unittest.mock import patch
|
||||
|
||||
from agent.copilot_acp_client import CopilotACPClient
|
||||
|
||||
|
||||
class _FakeProcess:
|
||||
def __init__(self) -> None:
|
||||
self.stdin = io.StringIO()
|
||||
|
||||
|
||||
class CopilotACPClientSafetyTests(unittest.TestCase):
|
||||
def setUp(self) -> None:
|
||||
self.client = CopilotACPClient(acp_cwd="/tmp")
|
||||
|
||||
def _dispatch(self, message: dict, *, cwd: str) -> dict:
|
||||
process = _FakeProcess()
|
||||
handled = self.client._handle_server_message(
|
||||
message,
|
||||
process=process,
|
||||
cwd=cwd,
|
||||
text_parts=[],
|
||||
reasoning_parts=[],
|
||||
)
|
||||
self.assertTrue(handled)
|
||||
payload = process.stdin.getvalue().strip()
|
||||
self.assertTrue(payload)
|
||||
return json.loads(payload)
|
||||
|
||||
def test_request_permission_is_not_auto_allowed(self) -> None:
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 1,
|
||||
"method": "session/request_permission",
|
||||
"params": {},
|
||||
},
|
||||
cwd="/tmp",
|
||||
)
|
||||
|
||||
outcome = (((response.get("result") or {}).get("outcome") or {}).get("outcome"))
|
||||
self.assertEqual(outcome, "cancelled")
|
||||
|
||||
def test_read_text_file_blocks_internal_hermes_hub_files(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
home = Path(tmpdir) / "home"
|
||||
blocked = home / ".hermes" / "skills" / ".hub" / "index-cache" / "entry.json"
|
||||
blocked.parent.mkdir(parents=True, exist_ok=True)
|
||||
blocked.write_text('{"token":"sk-test-secret-1234567890"}')
|
||||
|
||||
with patch.dict(
|
||||
os.environ,
|
||||
{"HOME": str(home), "HERMES_HOME": str(home / ".hermes")},
|
||||
clear=False,
|
||||
):
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 2,
|
||||
"method": "fs/read_text_file",
|
||||
"params": {"path": str(blocked)},
|
||||
},
|
||||
cwd=str(home),
|
||||
)
|
||||
|
||||
self.assertIn("error", response)
|
||||
|
||||
def test_read_text_file_redacts_sensitive_content(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
secret_file = root / "config.env"
|
||||
secret_file.write_text("OPENAI_API_KEY=sk-proj-abc123def456ghi789jkl012")
|
||||
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 3,
|
||||
"method": "fs/read_text_file",
|
||||
"params": {"path": str(secret_file)},
|
||||
},
|
||||
cwd=str(root),
|
||||
)
|
||||
|
||||
content = ((response.get("result") or {}).get("content") or "")
|
||||
self.assertNotIn("abc123def456", content)
|
||||
self.assertIn("OPENAI_API_KEY=", content)
|
||||
|
||||
def test_write_text_file_reuses_write_denylist(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
home = Path(tmpdir) / "home"
|
||||
target = home / ".ssh" / "id_rsa"
|
||||
target.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 4,
|
||||
"method": "fs/write_text_file",
|
||||
"params": {
|
||||
"path": str(target),
|
||||
"content": "fake-private-key",
|
||||
},
|
||||
},
|
||||
cwd=str(home),
|
||||
)
|
||||
|
||||
self.assertIn("error", response)
|
||||
self.assertFalse(target.exists())
|
||||
|
||||
def test_write_text_file_respects_safe_root(self) -> None:
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
root = Path(tmpdir)
|
||||
safe_root = root / "workspace"
|
||||
safe_root.mkdir()
|
||||
outside = root / "outside.txt"
|
||||
|
||||
with patch.dict(os.environ, {"HERMES_WRITE_SAFE_ROOT": str(safe_root)}, clear=False):
|
||||
response = self._dispatch(
|
||||
{
|
||||
"jsonrpc": "2.0",
|
||||
"id": 5,
|
||||
"method": "fs/write_text_file",
|
||||
"params": {
|
||||
"path": str(outside),
|
||||
"content": "should-not-write",
|
||||
},
|
||||
},
|
||||
cwd=str(root),
|
||||
)
|
||||
|
||||
self.assertIn("error", response)
|
||||
self.assertFalse(outside.exists())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
@ -516,13 +516,12 @@ class TestGatewayFormatting:
|
|||
assert "**" in text # Markdown bold
|
||||
|
||||
def test_gateway_format_hides_cost(self, populated_db):
|
||||
"""Gateway format omits dollar figures and internal cache details."""
|
||||
engine = InsightsEngine(populated_db)
|
||||
report = engine.generate(days=30)
|
||||
text = engine.format_gateway(report)
|
||||
|
||||
assert "$" in text
|
||||
assert "Top Skills" in text
|
||||
assert "Est. cost" in text
|
||||
assert "$" not in text
|
||||
assert "cache" not in text.lower()
|
||||
|
||||
def test_gateway_format_shows_models(self, populated_db):
|
||||
|
|
|
|||
|
|
@ -84,38 +84,6 @@ class TestMinimaxAuxModel:
|
|||
assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
|
||||
|
||||
|
||||
class TestMinimaxModelCatalog:
|
||||
"""Verify the model catalog matches official Anthropic-compat endpoint models.
|
||||
|
||||
Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api
|
||||
"""
|
||||
|
||||
def test_catalog_includes_current_models(self):
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
for provider in ("minimax", "minimax-cn"):
|
||||
models = _PROVIDER_MODELS[provider]
|
||||
assert "MiniMax-M2.7" in models
|
||||
assert "MiniMax-M2.5" in models
|
||||
assert "MiniMax-M2.1" in models
|
||||
assert "MiniMax-M2" in models
|
||||
|
||||
def test_catalog_excludes_m1_family(self):
|
||||
"""M1 models are not available on the /anthropic endpoint."""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
for provider in ("minimax", "minimax-cn"):
|
||||
models = _PROVIDER_MODELS[provider]
|
||||
assert "MiniMax-M1" not in models
|
||||
|
||||
def test_catalog_excludes_highspeed(self):
|
||||
"""Highspeed variants are available but not shown in default catalog
|
||||
(users can still specify them manually)."""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
for provider in ("minimax", "minimax-cn"):
|
||||
models = _PROVIDER_MODELS[provider]
|
||||
assert "MiniMax-M2.7-highspeed" not in models
|
||||
assert "MiniMax-M2.5-highspeed" not in models
|
||||
|
||||
|
||||
class TestMinimaxBetaHeaders:
|
||||
"""MiniMax Anthropic-compat endpoints reject fine-grained-tool-streaming beta.
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@ when proxy env vars or custom endpoint URLs are malformed.
|
|||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from agent.auxiliary_client import _validate_base_url, _validate_proxy_env_urls
|
||||
|
|
@ -31,6 +33,12 @@ def test_proxy_env_accepts_empty(monkeypatch):
|
|||
_validate_proxy_env_urls() # should not raise
|
||||
|
||||
|
||||
def test_proxy_env_normalizes_socks_alias(monkeypatch):
|
||||
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
|
||||
_validate_proxy_env_urls()
|
||||
assert os.environ["ALL_PROXY"] == "socks5://127.0.0.1:1080/"
|
||||
|
||||
|
||||
@pytest.mark.parametrize("key", [
|
||||
"HTTP_PROXY", "HTTPS_PROXY", "ALL_PROXY",
|
||||
"http_proxy", "https_proxy", "all_proxy",
|
||||
|
|
|
|||
|
|
@ -405,3 +405,191 @@ class TestPlanSkillHelpers:
|
|||
assert "Add a /plan command" in msg
|
||||
assert ".hermes/plans/plan.md" in msg
|
||||
assert "Runtime note:" in msg
|
||||
|
||||
|
||||
class TestSkillDirectoryHeader:
|
||||
"""The activation message must expose the absolute skill directory and
|
||||
explain how to resolve relative paths, so skills with bundled scripts
|
||||
don't force the agent into a second ``skill_view()`` round-trip."""
|
||||
|
||||
def test_header_contains_absolute_skill_dir(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
skill_dir = _make_skill(tmp_path, "abs-dir-skill")
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/abs-dir-skill", "go")
|
||||
|
||||
assert msg is not None
|
||||
assert f"[Skill directory: {skill_dir}]" in msg
|
||||
assert "Resolve any relative paths" in msg
|
||||
|
||||
def test_supporting_files_shown_with_absolute_paths(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
skill_dir = _make_skill(tmp_path, "scripted-skill")
|
||||
(skill_dir / "scripts").mkdir()
|
||||
(skill_dir / "scripts" / "run.js").write_text("console.log('hi')")
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/scripted-skill")
|
||||
|
||||
assert msg is not None
|
||||
# The supporting-files block must emit both the relative form (so the
|
||||
# agent can call skill_view on it) and the absolute form (so it can
|
||||
# run the script directly via terminal).
|
||||
assert "scripts/run.js" in msg
|
||||
assert str(skill_dir / "scripts" / "run.js") in msg
|
||||
assert f"node {skill_dir}/scripts/foo.js" in msg
|
||||
|
||||
|
||||
class TestTemplateVarSubstitution:
|
||||
"""``${HERMES_SKILL_DIR}`` and ``${HERMES_SESSION_ID}`` in SKILL.md body
|
||||
are replaced before the agent sees the content."""
|
||||
|
||||
def test_substitutes_skill_dir(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
skill_dir = _make_skill(
|
||||
tmp_path,
|
||||
"templated",
|
||||
body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/templated")
|
||||
|
||||
assert msg is not None
|
||||
assert f"node {skill_dir}/scripts/foo.js" in msg
|
||||
# The literal template token must not leak through.
|
||||
assert "${HERMES_SKILL_DIR}" not in msg.split("[Skill directory:")[0]
|
||||
|
||||
def test_substitutes_session_id_when_available(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"sess-templated",
|
||||
body="Session: ${HERMES_SESSION_ID}",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message(
|
||||
"/sess-templated", task_id="abc-123"
|
||||
)
|
||||
|
||||
assert msg is not None
|
||||
assert "Session: abc-123" in msg
|
||||
|
||||
def test_leaves_session_id_token_when_missing(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"sess-missing",
|
||||
body="Session: ${HERMES_SESSION_ID}",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/sess-missing", task_id=None)
|
||||
|
||||
assert msg is not None
|
||||
# No session — token left intact so the author can spot it.
|
||||
assert "Session: ${HERMES_SESSION_ID}" in msg
|
||||
|
||||
def test_disable_template_vars_via_config(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": False},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"no-sub",
|
||||
body="Run: node ${HERMES_SKILL_DIR}/scripts/foo.js",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/no-sub")
|
||||
|
||||
assert msg is not None
|
||||
# Template token must survive when substitution is disabled.
|
||||
assert "${HERMES_SKILL_DIR}/scripts/foo.js" in msg
|
||||
|
||||
|
||||
class TestInlineShellExpansion:
|
||||
"""Inline ``!`cmd`` snippets in SKILL.md run before the agent sees the
|
||||
content — but only when the user has opted in via config."""
|
||||
|
||||
def test_inline_shell_is_off_by_default(self, tmp_path):
|
||||
with patch("tools.skills_tool.SKILLS_DIR", tmp_path):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"dyn-default-off",
|
||||
body="Today is !`echo INLINE_RAN`.",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-default-off")
|
||||
|
||||
assert msg is not None
|
||||
# Default config has inline_shell=False — snippet must stay literal.
|
||||
assert "!`echo INLINE_RAN`" in msg
|
||||
assert "Today is INLINE_RAN." not in msg
|
||||
|
||||
def test_inline_shell_runs_when_enabled(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": True,
|
||||
"inline_shell_timeout": 5},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"dyn-on",
|
||||
body="Marker: !`echo INLINE_RAN`.",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-on")
|
||||
|
||||
assert msg is not None
|
||||
assert "Marker: INLINE_RAN." in msg
|
||||
assert "!`echo INLINE_RAN`" not in msg
|
||||
|
||||
def test_inline_shell_runs_in_skill_directory(self, tmp_path):
|
||||
"""Inline snippets get the skill dir as CWD so relative paths work."""
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": True,
|
||||
"inline_shell_timeout": 5},
|
||||
),
|
||||
):
|
||||
skill_dir = _make_skill(
|
||||
tmp_path,
|
||||
"dyn-cwd",
|
||||
body="Here: !`pwd`",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-cwd")
|
||||
|
||||
assert msg is not None
|
||||
assert f"Here: {skill_dir}" in msg
|
||||
|
||||
def test_inline_shell_timeout_does_not_break_message(self, tmp_path):
|
||||
with (
|
||||
patch("tools.skills_tool.SKILLS_DIR", tmp_path),
|
||||
patch(
|
||||
"agent.skill_commands._load_skills_config",
|
||||
return_value={"template_vars": True, "inline_shell": True,
|
||||
"inline_shell_timeout": 1},
|
||||
),
|
||||
):
|
||||
_make_skill(
|
||||
tmp_path,
|
||||
"dyn-slow",
|
||||
body="Slow: !`sleep 5 && printf DYN_MARKER`",
|
||||
)
|
||||
scan_skill_commands()
|
||||
msg = build_skill_invocation_message("/dyn-slow")
|
||||
|
||||
assert msg is not None
|
||||
# Timeout is surfaced as a marker instead of propagating as an error,
|
||||
# and the rest of the skill message still renders.
|
||||
assert "inline-shell timeout" in msg
|
||||
# The command's intended stdout never made it through — only the
|
||||
# timeout marker (which echoes the command text) survives.
|
||||
assert "DYN_MARKER" not in msg.replace("sleep 5 && printf DYN_MARKER", "")
|
||||
|
|
|
|||
0
tests/agent/transports/__init__.py
Normal file
0
tests/agent/transports/__init__.py
Normal file
220
tests/agent/transports/test_transport.py
Normal file
220
tests/agent/transports/test_transport.py
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
"""Tests for the transport ABC, registry, and AnthropicTransport."""
|
||||
|
||||
import pytest
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from agent.transports.base import ProviderTransport
|
||||
from agent.transports.types import NormalizedResponse, ToolCall, Usage
|
||||
from agent.transports import get_transport, register_transport, _REGISTRY
|
||||
|
||||
|
||||
# ── ABC contract tests ──────────────────────────────────────────────────
|
||||
|
||||
class TestProviderTransportABC:
|
||||
"""Verify the ABC contract is enforceable."""
|
||||
|
||||
def test_cannot_instantiate_abc(self):
|
||||
with pytest.raises(TypeError):
|
||||
ProviderTransport()
|
||||
|
||||
def test_concrete_must_implement_all_abstract(self):
|
||||
class Incomplete(ProviderTransport):
|
||||
@property
|
||||
def api_mode(self):
|
||||
return "test"
|
||||
with pytest.raises(TypeError):
|
||||
Incomplete()
|
||||
|
||||
def test_minimal_concrete(self):
|
||||
class Minimal(ProviderTransport):
|
||||
@property
|
||||
def api_mode(self):
|
||||
return "test_minimal"
|
||||
def convert_messages(self, messages, **kw):
|
||||
return messages
|
||||
def convert_tools(self, tools):
|
||||
return tools
|
||||
def build_kwargs(self, model, messages, tools=None, **params):
|
||||
return {"model": model, "messages": messages}
|
||||
def normalize_response(self, response, **kw):
|
||||
return NormalizedResponse(content="ok", tool_calls=None, finish_reason="stop")
|
||||
|
||||
t = Minimal()
|
||||
assert t.api_mode == "test_minimal"
|
||||
assert t.validate_response(None) is True # default
|
||||
assert t.extract_cache_stats(None) is None # default
|
||||
assert t.map_finish_reason("end_turn") == "end_turn" # default passthrough
|
||||
|
||||
|
||||
# ── Registry tests ───────────────────────────────────────────────────────
|
||||
|
||||
class TestTransportRegistry:
|
||||
|
||||
def test_get_unregistered_returns_none(self):
|
||||
assert get_transport("nonexistent_mode") is None
|
||||
|
||||
def test_anthropic_registered_on_import(self):
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
t = get_transport("anthropic_messages")
|
||||
assert t is not None
|
||||
assert t.api_mode == "anthropic_messages"
|
||||
|
||||
def test_register_and_get(self):
|
||||
class DummyTransport(ProviderTransport):
|
||||
@property
|
||||
def api_mode(self):
|
||||
return "dummy_test"
|
||||
def convert_messages(self, messages, **kw):
|
||||
return messages
|
||||
def convert_tools(self, tools):
|
||||
return tools
|
||||
def build_kwargs(self, model, messages, tools=None, **params):
|
||||
return {}
|
||||
def normalize_response(self, response, **kw):
|
||||
return NormalizedResponse(content=None, tool_calls=None, finish_reason="stop")
|
||||
|
||||
register_transport("dummy_test", DummyTransport)
|
||||
t = get_transport("dummy_test")
|
||||
assert t.api_mode == "dummy_test"
|
||||
# Cleanup
|
||||
_REGISTRY.pop("dummy_test", None)
|
||||
|
||||
|
||||
# ── AnthropicTransport tests ────────────────────────────────────────────
|
||||
|
||||
class TestAnthropicTransport:
|
||||
|
||||
@pytest.fixture
|
||||
def transport(self):
|
||||
import agent.transports.anthropic # noqa: F401
|
||||
return get_transport("anthropic_messages")
|
||||
|
||||
def test_api_mode(self, transport):
|
||||
assert transport.api_mode == "anthropic_messages"
|
||||
|
||||
def test_convert_tools_simple(self, transport):
|
||||
tools = [{
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": "test_tool",
|
||||
"description": "A test",
|
||||
"parameters": {"type": "object", "properties": {}},
|
||||
}
|
||||
}]
|
||||
result = transport.convert_tools(tools)
|
||||
assert len(result) == 1
|
||||
assert result[0]["name"] == "test_tool"
|
||||
assert "input_schema" in result[0]
|
||||
|
||||
def test_validate_response_none(self, transport):
|
||||
assert transport.validate_response(None) is False
|
||||
|
||||
def test_validate_response_empty_content(self, transport):
|
||||
r = SimpleNamespace(content=[])
|
||||
assert transport.validate_response(r) is False
|
||||
|
||||
def test_validate_response_valid(self, transport):
|
||||
r = SimpleNamespace(content=[SimpleNamespace(type="text", text="hello")])
|
||||
assert transport.validate_response(r) is True
|
||||
|
||||
def test_map_finish_reason(self, transport):
|
||||
assert transport.map_finish_reason("end_turn") == "stop"
|
||||
assert transport.map_finish_reason("tool_use") == "tool_calls"
|
||||
assert transport.map_finish_reason("max_tokens") == "length"
|
||||
assert transport.map_finish_reason("stop_sequence") == "stop"
|
||||
assert transport.map_finish_reason("refusal") == "content_filter"
|
||||
assert transport.map_finish_reason("model_context_window_exceeded") == "length"
|
||||
assert transport.map_finish_reason("unknown") == "stop"
|
||||
|
||||
def test_extract_cache_stats_none_usage(self, transport):
|
||||
r = SimpleNamespace(usage=None)
|
||||
assert transport.extract_cache_stats(r) is None
|
||||
|
||||
def test_extract_cache_stats_with_cache(self, transport):
|
||||
usage = SimpleNamespace(cache_read_input_tokens=100, cache_creation_input_tokens=50)
|
||||
r = SimpleNamespace(usage=usage)
|
||||
result = transport.extract_cache_stats(r)
|
||||
assert result == {"cached_tokens": 100, "creation_tokens": 50}
|
||||
|
||||
def test_extract_cache_stats_zero(self, transport):
|
||||
usage = SimpleNamespace(cache_read_input_tokens=0, cache_creation_input_tokens=0)
|
||||
r = SimpleNamespace(usage=usage)
|
||||
assert transport.extract_cache_stats(r) is None
|
||||
|
||||
def test_normalize_response_text(self, transport):
|
||||
"""Test normalization of a simple text response."""
|
||||
r = SimpleNamespace(
|
||||
content=[SimpleNamespace(type="text", text="Hello world")],
|
||||
stop_reason="end_turn",
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=5),
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert isinstance(nr, NormalizedResponse)
|
||||
assert nr.content == "Hello world"
|
||||
assert nr.tool_calls is None or nr.tool_calls == []
|
||||
assert nr.finish_reason == "stop"
|
||||
|
||||
def test_normalize_response_tool_calls(self, transport):
|
||||
"""Test normalization of a tool-use response."""
|
||||
r = SimpleNamespace(
|
||||
content=[
|
||||
SimpleNamespace(
|
||||
type="tool_use",
|
||||
id="toolu_123",
|
||||
name="terminal",
|
||||
input={"command": "ls"},
|
||||
),
|
||||
],
|
||||
stop_reason="tool_use",
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=20),
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.finish_reason == "tool_calls"
|
||||
assert len(nr.tool_calls) == 1
|
||||
tc = nr.tool_calls[0]
|
||||
assert tc.name == "terminal"
|
||||
assert tc.id == "toolu_123"
|
||||
assert '"command"' in tc.arguments
|
||||
|
||||
def test_normalize_response_thinking(self, transport):
|
||||
"""Test normalization preserves thinking content."""
|
||||
r = SimpleNamespace(
|
||||
content=[
|
||||
SimpleNamespace(type="thinking", thinking="Let me think..."),
|
||||
SimpleNamespace(type="text", text="The answer is 42"),
|
||||
],
|
||||
stop_reason="end_turn",
|
||||
usage=SimpleNamespace(input_tokens=10, output_tokens=15),
|
||||
model="claude-sonnet-4-6",
|
||||
)
|
||||
nr = transport.normalize_response(r)
|
||||
assert nr.content == "The answer is 42"
|
||||
assert nr.reasoning == "Let me think..."
|
||||
|
||||
def test_build_kwargs_returns_dict(self, transport):
|
||||
"""Test build_kwargs produces a usable kwargs dict."""
|
||||
messages = [{"role": "user", "content": "Hello"}]
|
||||
kw = transport.build_kwargs(
|
||||
model="claude-sonnet-4-6",
|
||||
messages=messages,
|
||||
max_tokens=1024,
|
||||
)
|
||||
assert isinstance(kw, dict)
|
||||
assert "model" in kw
|
||||
assert "max_tokens" in kw
|
||||
assert "messages" in kw
|
||||
|
||||
def test_convert_messages_extracts_system(self, transport):
|
||||
"""Test convert_messages separates system from messages."""
|
||||
messages = [
|
||||
{"role": "system", "content": "You are helpful."},
|
||||
{"role": "user", "content": "Hi"},
|
||||
]
|
||||
system, msgs = transport.convert_messages(messages)
|
||||
# System should be extracted
|
||||
assert system is not None
|
||||
# Messages should only have user
|
||||
assert len(msgs) >= 1
|
||||
151
tests/agent/transports/test_types.py
Normal file
151
tests/agent/transports/test_types.py
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
"""Tests for agent/transports/types.py — dataclass construction + helpers."""
|
||||
|
||||
import json
|
||||
import pytest
|
||||
|
||||
from agent.transports.types import (
|
||||
NormalizedResponse,
|
||||
ToolCall,
|
||||
Usage,
|
||||
build_tool_call,
|
||||
map_finish_reason,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# ToolCall
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestToolCall:
|
||||
def test_basic_construction(self):
|
||||
tc = ToolCall(id="call_abc", name="terminal", arguments='{"cmd": "ls"}')
|
||||
assert tc.id == "call_abc"
|
||||
assert tc.name == "terminal"
|
||||
assert tc.arguments == '{"cmd": "ls"}'
|
||||
assert tc.provider_data is None
|
||||
|
||||
def test_none_id(self):
|
||||
tc = ToolCall(id=None, name="read_file", arguments="{}")
|
||||
assert tc.id is None
|
||||
|
||||
def test_provider_data(self):
|
||||
tc = ToolCall(
|
||||
id="call_x",
|
||||
name="t",
|
||||
arguments="{}",
|
||||
provider_data={"call_id": "call_x", "response_item_id": "fc_x"},
|
||||
)
|
||||
assert tc.provider_data["call_id"] == "call_x"
|
||||
assert tc.provider_data["response_item_id"] == "fc_x"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Usage
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestUsage:
|
||||
def test_defaults(self):
|
||||
u = Usage()
|
||||
assert u.prompt_tokens == 0
|
||||
assert u.completion_tokens == 0
|
||||
assert u.total_tokens == 0
|
||||
assert u.cached_tokens == 0
|
||||
|
||||
def test_explicit(self):
|
||||
u = Usage(prompt_tokens=100, completion_tokens=50, total_tokens=150, cached_tokens=80)
|
||||
assert u.total_tokens == 150
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# NormalizedResponse
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestNormalizedResponse:
|
||||
def test_text_only(self):
|
||||
r = NormalizedResponse(content="hello", tool_calls=None, finish_reason="stop")
|
||||
assert r.content == "hello"
|
||||
assert r.tool_calls is None
|
||||
assert r.finish_reason == "stop"
|
||||
assert r.reasoning is None
|
||||
assert r.usage is None
|
||||
assert r.provider_data is None
|
||||
|
||||
def test_with_tool_calls(self):
|
||||
tcs = [ToolCall(id="call_1", name="terminal", arguments='{"cmd":"pwd"}')]
|
||||
r = NormalizedResponse(content=None, tool_calls=tcs, finish_reason="tool_calls")
|
||||
assert r.finish_reason == "tool_calls"
|
||||
assert len(r.tool_calls) == 1
|
||||
assert r.tool_calls[0].name == "terminal"
|
||||
|
||||
def test_with_reasoning(self):
|
||||
r = NormalizedResponse(
|
||||
content="answer",
|
||||
tool_calls=None,
|
||||
finish_reason="stop",
|
||||
reasoning="I thought about it",
|
||||
)
|
||||
assert r.reasoning == "I thought about it"
|
||||
|
||||
def test_with_provider_data(self):
|
||||
r = NormalizedResponse(
|
||||
content=None,
|
||||
tool_calls=None,
|
||||
finish_reason="stop",
|
||||
provider_data={"reasoning_details": [{"type": "thinking", "thinking": "hmm"}]},
|
||||
)
|
||||
assert r.provider_data["reasoning_details"][0]["type"] == "thinking"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# build_tool_call
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestBuildToolCall:
|
||||
def test_dict_arguments_serialized(self):
|
||||
tc = build_tool_call(id="call_1", name="terminal", arguments={"cmd": "ls"})
|
||||
assert tc.arguments == json.dumps({"cmd": "ls"})
|
||||
assert tc.provider_data is None
|
||||
|
||||
def test_string_arguments_passthrough(self):
|
||||
tc = build_tool_call(id="call_2", name="read_file", arguments='{"path": "/tmp"}')
|
||||
assert tc.arguments == '{"path": "/tmp"}'
|
||||
|
||||
def test_provider_fields(self):
|
||||
tc = build_tool_call(
|
||||
id="call_3",
|
||||
name="terminal",
|
||||
arguments="{}",
|
||||
call_id="call_3",
|
||||
response_item_id="fc_3",
|
||||
)
|
||||
assert tc.provider_data == {"call_id": "call_3", "response_item_id": "fc_3"}
|
||||
|
||||
def test_none_id(self):
|
||||
tc = build_tool_call(id=None, name="t", arguments="{}")
|
||||
assert tc.id is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# map_finish_reason
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestMapFinishReason:
|
||||
ANTHROPIC_MAP = {
|
||||
"end_turn": "stop",
|
||||
"tool_use": "tool_calls",
|
||||
"max_tokens": "length",
|
||||
"stop_sequence": "stop",
|
||||
"refusal": "content_filter",
|
||||
}
|
||||
|
||||
def test_known_reason(self):
|
||||
assert map_finish_reason("end_turn", self.ANTHROPIC_MAP) == "stop"
|
||||
assert map_finish_reason("tool_use", self.ANTHROPIC_MAP) == "tool_calls"
|
||||
assert map_finish_reason("max_tokens", self.ANTHROPIC_MAP) == "length"
|
||||
assert map_finish_reason("refusal", self.ANTHROPIC_MAP) == "content_filter"
|
||||
|
||||
def test_unknown_reason_defaults_to_stop(self):
|
||||
assert map_finish_reason("something_new", self.ANTHROPIC_MAP) == "stop"
|
||||
|
||||
def test_none_reason(self):
|
||||
assert map_finish_reason(None, self.ANTHROPIC_MAP) == "stop"
|
||||
146
tests/cli/test_cli_steer_busy_path.py
Normal file
146
tests/cli/test_cli_steer_busy_path.py
Normal file
|
|
@ -0,0 +1,146 @@
|
|||
"""Regression tests for classic-CLI mid-run /steer dispatch.
|
||||
|
||||
Background
|
||||
----------
|
||||
/steer sent while the agent is running used to be queued through
|
||||
``self._pending_input`` alongside ordinary user input. ``process_loop``
|
||||
pulls from that queue and calls ``process_command()`` — but while the
|
||||
agent is running, ``process_loop`` is blocked inside ``self.chat()``.
|
||||
By the time the queued /steer was pulled, ``_agent_running`` had
|
||||
already flipped back to False, so ``process_command()`` took the idle
|
||||
fallback (``"No agent running; queued as next turn"``) and delivered
|
||||
the steer as an ordinary next-turn message.
|
||||
|
||||
The fix dispatches /steer inline on the UI thread when the agent is
|
||||
running — matching the existing pattern for /model — so the steer
|
||||
reaches ``agent.steer()`` (thread-safe) without touching the queue.
|
||||
|
||||
These tests exercise the detector + inline dispatch without starting a
|
||||
prompt_toolkit app.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import importlib
|
||||
import sys
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
|
||||
def _make_cli():
|
||||
"""Create a HermesCLI instance with prompt_toolkit stubbed out."""
|
||||
_clean_config = {
|
||||
"model": {
|
||||
"default": "anthropic/claude-opus-4.6",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"provider": "auto",
|
||||
},
|
||||
"display": {"compact": False, "tool_progress": "all"},
|
||||
"agent": {},
|
||||
"terminal": {"env_type": "local"},
|
||||
}
|
||||
clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""}
|
||||
prompt_toolkit_stubs = {
|
||||
"prompt_toolkit": MagicMock(),
|
||||
"prompt_toolkit.history": MagicMock(),
|
||||
"prompt_toolkit.styles": MagicMock(),
|
||||
"prompt_toolkit.patch_stdout": MagicMock(),
|
||||
"prompt_toolkit.application": MagicMock(),
|
||||
"prompt_toolkit.layout": MagicMock(),
|
||||
"prompt_toolkit.layout.processors": MagicMock(),
|
||||
"prompt_toolkit.filters": MagicMock(),
|
||||
"prompt_toolkit.layout.dimension": MagicMock(),
|
||||
"prompt_toolkit.layout.menus": MagicMock(),
|
||||
"prompt_toolkit.widgets": MagicMock(),
|
||||
"prompt_toolkit.key_binding": MagicMock(),
|
||||
"prompt_toolkit.completion": MagicMock(),
|
||||
"prompt_toolkit.formatted_text": MagicMock(),
|
||||
"prompt_toolkit.auto_suggest": MagicMock(),
|
||||
}
|
||||
with patch.dict(sys.modules, prompt_toolkit_stubs), patch.dict(
|
||||
"os.environ", clean_env, clear=False
|
||||
):
|
||||
import cli as _cli_mod
|
||||
|
||||
_cli_mod = importlib.reload(_cli_mod)
|
||||
with patch.object(_cli_mod, "get_tool_definitions", return_value=[]), patch.dict(
|
||||
_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}
|
||||
):
|
||||
return _cli_mod.HermesCLI()
|
||||
|
||||
|
||||
class TestSteerInlineDetector:
|
||||
"""_should_handle_steer_command_inline gates the busy-path fast dispatch."""
|
||||
|
||||
def test_detects_steer_when_agent_running(self):
|
||||
cli = _make_cli()
|
||||
cli._agent_running = True
|
||||
assert cli._should_handle_steer_command_inline("/steer focus on error handling") is True
|
||||
|
||||
def test_ignores_steer_when_agent_idle(self):
|
||||
"""Idle-path /steer should fall through to the normal process_loop
|
||||
dispatch so the queue-style fallback message is emitted."""
|
||||
cli = _make_cli()
|
||||
cli._agent_running = False
|
||||
assert cli._should_handle_steer_command_inline("/steer do something") is False
|
||||
|
||||
def test_ignores_non_slash_input(self):
|
||||
cli = _make_cli()
|
||||
cli._agent_running = True
|
||||
assert cli._should_handle_steer_command_inline("steer without slash") is False
|
||||
assert cli._should_handle_steer_command_inline("") is False
|
||||
|
||||
def test_ignores_other_slash_commands(self):
|
||||
cli = _make_cli()
|
||||
cli._agent_running = True
|
||||
assert cli._should_handle_steer_command_inline("/queue hello") is False
|
||||
assert cli._should_handle_steer_command_inline("/stop") is False
|
||||
assert cli._should_handle_steer_command_inline("/help") is False
|
||||
|
||||
def test_ignores_steer_with_attached_images(self):
|
||||
"""Image payloads take the normal path; steer doesn't accept images."""
|
||||
cli = _make_cli()
|
||||
cli._agent_running = True
|
||||
assert cli._should_handle_steer_command_inline("/steer text", has_images=True) is False
|
||||
|
||||
|
||||
class TestSteerBusyPathDispatch:
|
||||
"""When the detector fires, process_command('/steer ...') must call
|
||||
agent.steer() directly rather than the idle-path fallback."""
|
||||
|
||||
def test_process_command_routes_to_agent_steer(self):
|
||||
"""With _agent_running=True and agent.steer present, /steer reaches
|
||||
agent.steer(payload), NOT _pending_input."""
|
||||
cli = _make_cli()
|
||||
cli._agent_running = True
|
||||
cli.agent = MagicMock()
|
||||
cli.agent.steer = MagicMock(return_value=True)
|
||||
# Make sure the idle-path fallback would be observable if taken
|
||||
cli._pending_input = MagicMock()
|
||||
|
||||
cli.process_command("/steer focus on errors")
|
||||
|
||||
cli.agent.steer.assert_called_once_with("focus on errors")
|
||||
cli._pending_input.put.assert_not_called()
|
||||
|
||||
def test_idle_path_queues_as_next_turn(self):
|
||||
"""Control — when the agent is NOT running, /steer correctly falls
|
||||
back to next-turn queue semantics. Demonstrates why the fix was
|
||||
needed: the queue path only works when you can actually drain it."""
|
||||
cli = _make_cli()
|
||||
cli._agent_running = False
|
||||
cli.agent = MagicMock()
|
||||
cli.agent.steer = MagicMock(return_value=True)
|
||||
cli._pending_input = MagicMock()
|
||||
|
||||
cli.process_command("/steer would-be-next-turn")
|
||||
|
||||
# Idle path does NOT call agent.steer
|
||||
cli.agent.steer.assert_not_called()
|
||||
# It puts the payload in the queue as a normal next-turn message
|
||||
cli._pending_input.put.assert_called_once_with("would-be-next-turn")
|
||||
|
||||
|
||||
if __name__ == "__main__": # pragma: no cover
|
||||
import pytest
|
||||
|
||||
pytest.main([__file__, "-v"])
|
||||
|
|
@ -186,6 +186,31 @@ _HERMES_BEHAVIORAL_VARS = frozenset({
|
|||
"HERMES_HOME_MODE",
|
||||
"BROWSER_CDP_URL",
|
||||
"CAMOFOX_URL",
|
||||
# Platform allowlists — not credentials, but if set from any source
|
||||
# (user shell, earlier leaky test, CI env), they change gateway auth
|
||||
# behavior and flake button-authorization tests.
|
||||
"TELEGRAM_ALLOWED_USERS",
|
||||
"DISCORD_ALLOWED_USERS",
|
||||
"WHATSAPP_ALLOWED_USERS",
|
||||
"SLACK_ALLOWED_USERS",
|
||||
"SIGNAL_ALLOWED_USERS",
|
||||
"SIGNAL_GROUP_ALLOWED_USERS",
|
||||
"EMAIL_ALLOWED_USERS",
|
||||
"SMS_ALLOWED_USERS",
|
||||
"MATTERMOST_ALLOWED_USERS",
|
||||
"MATRIX_ALLOWED_USERS",
|
||||
"DINGTALK_ALLOWED_USERS",
|
||||
"FEISHU_ALLOWED_USERS",
|
||||
"WECOM_ALLOWED_USERS",
|
||||
"GATEWAY_ALLOWED_USERS",
|
||||
"GATEWAY_ALLOW_ALL_USERS",
|
||||
"TELEGRAM_ALLOW_ALL_USERS",
|
||||
"DISCORD_ALLOW_ALL_USERS",
|
||||
"WHATSAPP_ALLOW_ALL_USERS",
|
||||
"SLACK_ALLOW_ALL_USERS",
|
||||
"SIGNAL_ALLOW_ALL_USERS",
|
||||
"EMAIL_ALLOW_ALL_USERS",
|
||||
"SMS_ALLOW_ALL_USERS",
|
||||
})
|
||||
|
||||
|
||||
|
|
@ -258,6 +283,107 @@ def _isolate_hermes_home(_hermetic_environment):
|
|||
return None
|
||||
|
||||
|
||||
# ── Module-level state reset ───────────────────────────────────────────────
|
||||
#
|
||||
# Python modules are singletons per process, and pytest-xdist workers are
|
||||
# long-lived. Module-level dicts/sets (tool registries, approval state,
|
||||
# interrupt flags) and ContextVars persist across tests in the same worker,
|
||||
# causing tests that pass alone to fail when run with siblings.
|
||||
#
|
||||
# Each entry in this fixture clears state that belongs to a specific module.
|
||||
# New state buckets go here too — this is the single gate that prevents
|
||||
# "works alone, flakes in CI" bugs from state leakage.
|
||||
#
|
||||
# The skill `test-suite-cascade-diagnosis` documents the concrete patterns
|
||||
# this closes; the running example was `test_command_guards` failing 12/15
|
||||
# CI runs because ``tools.approval._session_approved`` carried approvals
|
||||
# from one test's session into another's.
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_module_state():
|
||||
"""Clear module-level mutable state and ContextVars between tests.
|
||||
|
||||
Keeps state from leaking across tests on the same xdist worker. Modules
|
||||
that don't exist yet (test collection before production import) are
|
||||
skipped silently — production import later creates fresh empty state.
|
||||
"""
|
||||
# --- tools.approval — the single biggest source of cross-test pollution ---
|
||||
try:
|
||||
from tools import approval as _approval_mod
|
||||
_approval_mod._session_approved.clear()
|
||||
_approval_mod._session_yolo.clear()
|
||||
_approval_mod._permanent_approved.clear()
|
||||
_approval_mod._pending.clear()
|
||||
_approval_mod._gateway_queues.clear()
|
||||
_approval_mod._gateway_notify_cbs.clear()
|
||||
# ContextVar: reset to empty string so get_current_session_key()
|
||||
# falls through to the env var / default path, matching a fresh
|
||||
# process.
|
||||
_approval_mod._approval_session_key.set("")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.interrupt — per-thread interrupt flag set ---
|
||||
try:
|
||||
from tools import interrupt as _interrupt_mod
|
||||
with _interrupt_mod._lock:
|
||||
_interrupt_mod._interrupted_threads.clear()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- gateway.session_context — 9 ContextVars that represent
|
||||
# the active gateway session. If set in one test and not reset,
|
||||
# the next test's get_session_env() reads stale values.
|
||||
try:
|
||||
from gateway import session_context as _sc_mod
|
||||
for _cv in (
|
||||
_sc_mod._SESSION_PLATFORM,
|
||||
_sc_mod._SESSION_CHAT_ID,
|
||||
_sc_mod._SESSION_CHAT_NAME,
|
||||
_sc_mod._SESSION_THREAD_ID,
|
||||
_sc_mod._SESSION_USER_ID,
|
||||
_sc_mod._SESSION_USER_NAME,
|
||||
_sc_mod._SESSION_KEY,
|
||||
_sc_mod._CRON_AUTO_DELIVER_PLATFORM,
|
||||
_sc_mod._CRON_AUTO_DELIVER_CHAT_ID,
|
||||
_sc_mod._CRON_AUTO_DELIVER_THREAD_ID,
|
||||
):
|
||||
_cv.set(_sc_mod._UNSET)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.env_passthrough — ContextVar<set[str]> with no default ---
|
||||
# LookupError is normal if the test never set it. Setting it to an
|
||||
# empty set unconditionally normalizes the starting state.
|
||||
try:
|
||||
from tools import env_passthrough as _envp_mod
|
||||
_envp_mod._allowed_env_vars_var.set(set())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.credential_files — ContextVar<dict> ---
|
||||
try:
|
||||
from tools import credential_files as _credf_mod
|
||||
_credf_mod._registered_files_var.set({})
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# --- tools.file_tools — per-task read history + file-ops cache ---
|
||||
# _read_tracker accumulates per-task_id read history for loop detection,
|
||||
# capped by _READ_HISTORY_CAP. If entries from a prior test persist, the
|
||||
# cap is hit faster than expected and capacity-related tests flake.
|
||||
try:
|
||||
from tools import file_tools as _ft_mod
|
||||
with _ft_mod._read_tracker_lock:
|
||||
_ft_mod._read_tracker.clear()
|
||||
with _ft_mod._file_ops_lock:
|
||||
_ft_mod._file_ops_cache.clear()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def tmp_dir(tmp_path):
|
||||
"""Provide a temporary directory that is cleaned up automatically."""
|
||||
|
|
|
|||
|
|
@ -1580,3 +1580,128 @@ class TestParallelTick:
|
|||
end_s1 = [t for action, jid, t in call_times if action == "end" and jid == "s1"][0]
|
||||
start_s2 = [t for action, jid, t in call_times if action == "start" and jid == "s2"][0]
|
||||
assert start_s2 >= end_s1, "Jobs ran concurrently despite max_parallel=1"
|
||||
|
||||
|
||||
class TestDeliverResultTimeoutCancelsFuture:
|
||||
"""When future.result(timeout=60) raises TimeoutError in the live
|
||||
adapter delivery path, _deliver_result must cancel the orphan
|
||||
coroutine so it cannot duplicate-send after the standalone fallback.
|
||||
"""
|
||||
|
||||
def test_live_adapter_timeout_cancels_future_and_falls_back(self):
|
||||
"""End-to-end: live adapter hangs past the 60s budget, _deliver_result
|
||||
patches the timeout down to a fast value, confirms future.cancel() fires,
|
||||
and verifies the standalone fallback path still delivers."""
|
||||
from gateway.config import Platform
|
||||
from concurrent.futures import Future
|
||||
|
||||
# Live adapter whose send() coroutine never resolves within the budget
|
||||
adapter = AsyncMock()
|
||||
adapter.send.return_value = MagicMock(success=True)
|
||||
|
||||
pconfig = MagicMock()
|
||||
pconfig.enabled = True
|
||||
mock_cfg = MagicMock()
|
||||
mock_cfg.platforms = {Platform.TELEGRAM: pconfig}
|
||||
|
||||
loop = MagicMock()
|
||||
loop.is_running.return_value = True
|
||||
|
||||
# A real concurrent.futures.Future so .cancel() has real semantics,
|
||||
# but we override .result() to raise TimeoutError exactly like the
|
||||
# 60s wait firing in production.
|
||||
captured_future = Future()
|
||||
cancel_calls = []
|
||||
original_cancel = captured_future.cancel
|
||||
|
||||
def tracking_cancel():
|
||||
cancel_calls.append(True)
|
||||
return original_cancel()
|
||||
|
||||
captured_future.cancel = tracking_cancel
|
||||
captured_future.result = MagicMock(side_effect=TimeoutError("timed out"))
|
||||
|
||||
def fake_run_coro(coro, _loop):
|
||||
coro.close()
|
||||
return captured_future
|
||||
|
||||
job = {
|
||||
"id": "timeout-job",
|
||||
"deliver": "origin",
|
||||
"origin": {"platform": "telegram", "chat_id": "123"},
|
||||
}
|
||||
|
||||
standalone_send = AsyncMock(return_value={"success": True})
|
||||
|
||||
with patch("gateway.config.load_gateway_config", return_value=mock_cfg), \
|
||||
patch("cron.scheduler.load_config", return_value={"cron": {"wrap_response": False}}), \
|
||||
patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro), \
|
||||
patch("tools.send_message_tool._send_to_platform", new=standalone_send):
|
||||
result = _deliver_result(
|
||||
job,
|
||||
"Hello world",
|
||||
adapters={Platform.TELEGRAM: adapter},
|
||||
loop=loop,
|
||||
)
|
||||
|
||||
# 1. The orphan future was cancelled on timeout (the bug fix)
|
||||
assert cancel_calls == [True], "future.cancel() must fire on TimeoutError"
|
||||
# 2. The standalone fallback delivered — no double send, no silent drop
|
||||
assert result is None, f"expected successful delivery, got error: {result!r}"
|
||||
standalone_send.assert_awaited_once()
|
||||
|
||||
|
||||
class TestSendMediaTimeoutCancelsFuture:
|
||||
"""Same orphan-coroutine guarantee for _send_media_via_adapter's
|
||||
future.result(timeout=30) call. If this times out mid-batch, the
|
||||
in-flight coroutine must be cancelled before the next file is tried.
|
||||
"""
|
||||
|
||||
def test_media_send_timeout_cancels_future_and_continues(self):
|
||||
"""End-to-end: _send_media_via_adapter with a future whose .result()
|
||||
raises TimeoutError. Assert cancel() fires and the loop proceeds
|
||||
to the next file rather than hanging or crashing."""
|
||||
from concurrent.futures import Future
|
||||
|
||||
adapter = MagicMock()
|
||||
adapter.send_image_file = AsyncMock()
|
||||
adapter.send_video = AsyncMock()
|
||||
|
||||
# First file: future that times out. Second file: future that resolves OK.
|
||||
timeout_future = Future()
|
||||
timeout_cancel_calls = []
|
||||
original_cancel = timeout_future.cancel
|
||||
|
||||
def tracking_cancel():
|
||||
timeout_cancel_calls.append(True)
|
||||
return original_cancel()
|
||||
|
||||
timeout_future.cancel = tracking_cancel
|
||||
timeout_future.result = MagicMock(side_effect=TimeoutError("timed out"))
|
||||
|
||||
ok_future = Future()
|
||||
ok_future.set_result(MagicMock(success=True))
|
||||
|
||||
futures_iter = iter([timeout_future, ok_future])
|
||||
|
||||
def fake_run_coro(coro, _loop):
|
||||
coro.close()
|
||||
return next(futures_iter)
|
||||
|
||||
media_files = [
|
||||
("/tmp/slow.png", False), # times out
|
||||
("/tmp/fast.mp4", False), # succeeds
|
||||
]
|
||||
|
||||
loop = MagicMock()
|
||||
job = {"id": "media-timeout"}
|
||||
|
||||
with patch("asyncio.run_coroutine_threadsafe", side_effect=fake_run_coro):
|
||||
# Should not raise — the except Exception clause swallows the timeout
|
||||
_send_media_via_adapter(adapter, "chat-1", media_files, None, loop, job)
|
||||
|
||||
# 1. The timed-out future was cancelled (the bug fix)
|
||||
assert timeout_cancel_calls == [True], "future.cancel() must fire on TimeoutError"
|
||||
# 2. Second file still got dispatched — one timeout doesn't abort the batch
|
||||
adapter.send_video.assert_called_once()
|
||||
assert adapter.send_video.call_args[1]["video_path"] == "/tmp/fast.mp4"
|
||||
|
|
|
|||
|
|
@ -355,8 +355,17 @@ async def test_none_user_id_does_not_generate_pairing_code(monkeypatch, tmp_path
|
|||
async def test_non_internal_event_without_user_triggers_pairing(monkeypatch, tmp_path):
|
||||
"""Verify the normal (non-internal) path still triggers pairing for unknown users."""
|
||||
import gateway.run as gateway_run
|
||||
import gateway.pairing as pairing_mod
|
||||
|
||||
monkeypatch.setattr(gateway_run, "_hermes_home", tmp_path)
|
||||
# gateway.pairing.PAIRING_DIR is a module-level constant captured at
|
||||
# import time from whichever HERMES_HOME was set then. Per-test
|
||||
# HERMES_HOME redirection in conftest doesn't retroactively move it.
|
||||
# Override directly so pairing rate-limit state lives in this test's
|
||||
# tmp_path (and so stale state from prior xdist workers can't leak in).
|
||||
pairing_dir = tmp_path / "pairing"
|
||||
pairing_dir.mkdir()
|
||||
monkeypatch.setattr(pairing_mod, "PAIRING_DIR", pairing_dir)
|
||||
(tmp_path / "config.yaml").write_text("", encoding="utf-8")
|
||||
|
||||
# Clear env vars that could let all users through (loaded by
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
|||
import pytest
|
||||
|
||||
from gateway.config import Platform, StreamingConfig
|
||||
from gateway.platforms.base import resolve_proxy_url
|
||||
from gateway.run import GatewayRunner
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
|
@ -133,6 +134,15 @@ class TestGetProxyUrl:
|
|||
assert runner._get_proxy_url() is None
|
||||
|
||||
|
||||
class TestResolveProxyUrl:
|
||||
def test_normalizes_socks_alias_from_all_proxy(self, monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
|
||||
assert resolve_proxy_url() == "socks5://127.0.0.1:1080/"
|
||||
|
||||
|
||||
class TestRunAgentProxyDispatch:
|
||||
"""Test that _run_agent() delegates to proxy when configured."""
|
||||
|
||||
|
|
|
|||
|
|
@ -184,8 +184,15 @@ async def test_start_gateway_replace_force_uses_terminate_pid(monkeypatch, tmp_p
|
|||
async def stop(self):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
|
||||
# get_running_pid returns 42 before we kill the old gateway, then None
|
||||
# after remove_pid_file() clears the record (reflects real behavior).
|
||||
_pid_state = {"alive": True}
|
||||
def _mock_get_running_pid():
|
||||
return 42 if _pid_state["alive"] else None
|
||||
def _mock_remove_pid_file():
|
||||
_pid_state["alive"] = False
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
|
||||
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
|
||||
monkeypatch.setattr("gateway.status.terminate_pid", lambda pid, force=False: calls.append((pid, force)))
|
||||
monkeypatch.setattr("gateway.run.os.getpid", lambda: 100)
|
||||
|
|
@ -253,8 +260,13 @@ async def test_start_gateway_replace_writes_takeover_marker_before_sigterm(
|
|||
async def stop(self):
|
||||
return None
|
||||
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 42)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", lambda: None)
|
||||
_pid_state = {"alive": True}
|
||||
def _mock_get_running_pid():
|
||||
return 42 if _pid_state["alive"] else None
|
||||
def _mock_remove_pid_file():
|
||||
_pid_state["alive"] = False
|
||||
monkeypatch.setattr("gateway.status.get_running_pid", _mock_get_running_pid)
|
||||
monkeypatch.setattr("gateway.status.remove_pid_file", _mock_remove_pid_file)
|
||||
monkeypatch.setattr("gateway.status.release_all_scoped_locks", lambda: 0)
|
||||
monkeypatch.setattr("gateway.status.write_takeover_marker", record_write_marker)
|
||||
monkeypatch.setattr("gateway.status.terminate_pid", record_terminate)
|
||||
|
|
|
|||
|
|
@ -356,6 +356,28 @@ class TestBuildSessionContextPrompt:
|
|||
assert "**User:** Alice" in prompt
|
||||
assert "Multi-user thread" not in prompt
|
||||
|
||||
def test_shared_non_thread_group_prompt_hides_single_user(self):
|
||||
"""Shared non-thread group sessions should avoid pinning one user."""
|
||||
config = GatewayConfig(
|
||||
platforms={
|
||||
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
|
||||
},
|
||||
group_sessions_per_user=False,
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="-1002285219667",
|
||||
chat_name="Test Group",
|
||||
chat_type="group",
|
||||
user_name="Alice",
|
||||
)
|
||||
ctx = build_session_context(source, config)
|
||||
prompt = build_session_context_prompt(ctx)
|
||||
|
||||
assert "Multi-user session" in prompt
|
||||
assert "[sender name]" in prompt
|
||||
assert "**User:** Alice" not in prompt
|
||||
|
||||
def test_dm_thread_shows_user_not_multi(self):
|
||||
"""DM threads are single-user and should show User, not multi-user note."""
|
||||
config = GatewayConfig(
|
||||
|
|
|
|||
70
tests/gateway/test_shared_group_sender_prefix.py
Normal file
70
tests/gateway/test_shared_group_sender_prefix.py
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.platforms.base import MessageEvent
|
||||
from gateway.run import GatewayRunner
|
||||
from gateway.session import SessionSource
|
||||
|
||||
|
||||
def _make_runner(config: GatewayConfig) -> GatewayRunner:
|
||||
runner = object.__new__(GatewayRunner)
|
||||
runner.config = config
|
||||
runner.adapters = {}
|
||||
runner._model = "openai/gpt-4.1-mini"
|
||||
runner._base_url = None
|
||||
return runner
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_preprocess_prefixes_sender_for_shared_non_thread_group_session():
|
||||
runner = _make_runner(
|
||||
GatewayConfig(
|
||||
platforms={
|
||||
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
|
||||
},
|
||||
group_sessions_per_user=False,
|
||||
)
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="-1002285219667",
|
||||
chat_name="Test Group",
|
||||
chat_type="group",
|
||||
user_name="Alice",
|
||||
)
|
||||
event = MessageEvent(text="hello", source=source)
|
||||
|
||||
result = await runner._prepare_inbound_message_text(
|
||||
event=event,
|
||||
source=source,
|
||||
history=[],
|
||||
)
|
||||
|
||||
assert result == "[Alice] hello"
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_preprocess_keeps_plain_text_for_default_group_sessions():
|
||||
runner = _make_runner(
|
||||
GatewayConfig(
|
||||
platforms={
|
||||
Platform.TELEGRAM: PlatformConfig(enabled=True, token="fake"),
|
||||
},
|
||||
)
|
||||
)
|
||||
source = SessionSource(
|
||||
platform=Platform.TELEGRAM,
|
||||
chat_id="-1002285219667",
|
||||
chat_name="Test Group",
|
||||
chat_type="group",
|
||||
user_name="Alice",
|
||||
)
|
||||
event = MessageEvent(text="hello", source=source)
|
||||
|
||||
result = await runner._prepare_inbound_message_text(
|
||||
event=event,
|
||||
source=source,
|
||||
history=[],
|
||||
)
|
||||
|
||||
assert result == "hello"
|
||||
|
|
@ -306,7 +306,13 @@ class TestSignalSessionSource:
|
|||
class TestSignalPhoneRedaction:
|
||||
@pytest.fixture(autouse=True)
|
||||
def _ensure_redaction_enabled(self, monkeypatch):
|
||||
# agent.redact snapshots _REDACT_ENABLED at import time from the
|
||||
# HERMES_REDACT_SECRETS env var. monkeypatch.delenv is too late —
|
||||
# the module was already imported during test collection with
|
||||
# whatever value was in the env then. Force the flag directly.
|
||||
# See skill: xdist-cross-test-pollution Pattern 5.
|
||||
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
|
||||
monkeypatch.setattr("agent.redact._REDACT_ENABLED", True)
|
||||
|
||||
def test_us_number(self):
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
|
|
|||
|
|
@ -19,6 +19,30 @@ class TestGatewayPidState:
|
|||
assert isinstance(payload["argv"], list)
|
||||
assert payload["argv"]
|
||||
|
||||
def test_write_pid_file_is_atomic_against_concurrent_writers(self, tmp_path, monkeypatch):
|
||||
"""Regression: two concurrent --replace invocations must not both win.
|
||||
|
||||
Without O_CREAT|O_EXCL, two processes racing through start_gateway()'s
|
||||
termination-wait would both write to gateway.pid, silently overwriting
|
||||
each other and leaving multiple gateway instances alive (#11718).
|
||||
"""
|
||||
import pytest
|
||||
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
# First write wins.
|
||||
status.write_pid_file()
|
||||
assert (tmp_path / "gateway.pid").exists()
|
||||
|
||||
# Second write (simulating a racing --replace that missed the earlier
|
||||
# guards) must raise FileExistsError rather than clobber the record.
|
||||
with pytest.raises(FileExistsError):
|
||||
status.write_pid_file()
|
||||
|
||||
# Original record is preserved.
|
||||
payload = json.loads((tmp_path / "gateway.pid").read_text())
|
||||
assert payload["pid"] == os.getpid()
|
||||
|
||||
def test_get_running_pid_rejects_live_non_gateway_pid(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
pid_path = tmp_path / "gateway.pid"
|
||||
|
|
|
|||
|
|
@ -71,7 +71,17 @@ def test_group_messages_can_require_direct_trigger_via_config():
|
|||
assert adapter._should_process_message(_group_message("hello everyone")) is False
|
||||
assert adapter._should_process_message(_group_message("hi @hermes_bot", entities=[_mention_entity("hi @hermes_bot")])) is True
|
||||
assert adapter._should_process_message(_group_message("replying", reply_to_bot=True)) is True
|
||||
assert adapter._should_process_message(_group_message("/status"), is_command=True) is True
|
||||
# Commands must also respect require_mention when it is enabled
|
||||
assert adapter._should_process_message(_group_message("/status"), is_command=True) is False
|
||||
# But commands with @mention still pass (Telegram emits a MENTION entity
|
||||
# for /cmd@botname — the bot menu and python-telegram-bot's CommandHandler
|
||||
# rely on this same mechanism)
|
||||
assert adapter._should_process_message(
|
||||
_group_message("/status@hermes_bot", entities=[_mention_entity("/status@hermes_bot")])
|
||||
) is True
|
||||
# And commands still pass unconditionally when require_mention is disabled
|
||||
adapter_no_mention = _make_adapter(require_mention=False)
|
||||
assert adapter_no_mention._should_process_message(_group_message("/status"), is_command=True) is True
|
||||
|
||||
|
||||
def test_free_response_chats_bypass_mention_requirement():
|
||||
|
|
|
|||
100
tests/gateway/test_telegram_webhook_secret.py
Normal file
100
tests/gateway/test_telegram_webhook_secret.py
Normal file
|
|
@ -0,0 +1,100 @@
|
|||
"""Tests for GHSA-3vpc-7q5r-276h — Telegram webhook secret required.
|
||||
|
||||
Previously, when TELEGRAM_WEBHOOK_URL was set but TELEGRAM_WEBHOOK_SECRET
|
||||
was not, python-telegram-bot received secret_token=None and the webhook
|
||||
endpoint accepted any HTTP POST.
|
||||
|
||||
The fix refuses to start the adapter in webhook mode without the secret.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
_repo = str(Path(__file__).resolve().parents[2])
|
||||
if _repo not in sys.path:
|
||||
sys.path.insert(0, _repo)
|
||||
|
||||
|
||||
class TestTelegramWebhookSecretRequired:
|
||||
"""Direct source-level check of the webhook-secret guard.
|
||||
|
||||
The guard is embedded in TelegramAdapter.connect() and hard to isolate
|
||||
via mocks (requires a full python-telegram-bot ApplicationBuilder
|
||||
chain). These tests exercise it via source inspection — verifying the
|
||||
check exists, raises RuntimeError with the advisory link, and only
|
||||
fires in webhook mode. End-to-end validation is covered by CI +
|
||||
manual deployment tests.
|
||||
"""
|
||||
|
||||
def _get_source(self) -> str:
|
||||
path = Path(_repo) / "gateway" / "platforms" / "telegram.py"
|
||||
return path.read_text(encoding="utf-8")
|
||||
|
||||
def test_webhook_branch_checks_secret(self):
|
||||
"""The webhook-mode branch of connect() must read
|
||||
TELEGRAM_WEBHOOK_SECRET and refuse when empty."""
|
||||
src = self._get_source()
|
||||
# The guard must appear after TELEGRAM_WEBHOOK_URL is set
|
||||
assert re.search(
|
||||
r'TELEGRAM_WEBHOOK_SECRET.*?\.strip\(\)\s*\n\s*if not webhook_secret:',
|
||||
src, re.DOTALL,
|
||||
), (
|
||||
"TelegramAdapter.connect() must strip TELEGRAM_WEBHOOK_SECRET "
|
||||
"and raise when the secret is empty — see GHSA-3vpc-7q5r-276h"
|
||||
)
|
||||
|
||||
def test_guard_raises_runtime_error(self):
|
||||
"""The guard raises RuntimeError (not a silent log) so operators
|
||||
see the failure at startup."""
|
||||
src = self._get_source()
|
||||
# Between the "if not webhook_secret:" line and the next blank
|
||||
# line block, we should see a RuntimeError being raised
|
||||
guard_match = re.search(
|
||||
r'if not webhook_secret:\s*\n\s*raise\s+RuntimeError\(',
|
||||
src,
|
||||
)
|
||||
assert guard_match, (
|
||||
"Missing webhook secret must raise RuntimeError — silent "
|
||||
"fall-through was the original GHSA-3vpc-7q5r-276h bypass"
|
||||
)
|
||||
|
||||
def test_guard_message_includes_advisory_link(self):
|
||||
"""The RuntimeError message should reference the advisory so
|
||||
operators can read the full context."""
|
||||
src = self._get_source()
|
||||
assert "GHSA-3vpc-7q5r-276h" in src, (
|
||||
"Guard error message must cite the advisory for operator context"
|
||||
)
|
||||
|
||||
def test_guard_message_explains_remediation(self):
|
||||
"""The error should tell the operator how to fix it."""
|
||||
src = self._get_source()
|
||||
# Should mention how to generate a secret
|
||||
assert "openssl rand" in src or "TELEGRAM_WEBHOOK_SECRET=" in src, (
|
||||
"Guard error message should show operators how to set "
|
||||
"TELEGRAM_WEBHOOK_SECRET"
|
||||
)
|
||||
|
||||
def test_polling_branch_has_no_secret_guard(self):
|
||||
"""Polling mode (else-branch) must NOT require the webhook secret —
|
||||
polling authenticates via the bot token, not a webhook secret."""
|
||||
src = self._get_source()
|
||||
# The guard should appear inside the `if webhook_url:` branch,
|
||||
# not the `else:` polling branch. Rough check: the raise is
|
||||
# followed (within ~60 lines) by an `else:` that starts the
|
||||
# polling branch, and there's no secret-check in that polling
|
||||
# branch.
|
||||
webhook_block = re.search(
|
||||
r'if webhook_url:\s*\n(.*?)\n else:\s*\n(.*?)\n',
|
||||
src, re.DOTALL,
|
||||
)
|
||||
if webhook_block:
|
||||
webhook_body = webhook_block.group(1)
|
||||
polling_body = webhook_block.group(2)
|
||||
assert "TELEGRAM_WEBHOOK_SECRET" in webhook_body
|
||||
assert "TELEGRAM_WEBHOOK_SECRET" not in polling_body
|
||||
|
|
@ -175,3 +175,79 @@ class TestUsageCachedAgent:
|
|||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "Cost: included" in result
|
||||
|
||||
|
||||
class TestUsageAccountSection:
|
||||
"""Account-limits section appended to /usage output (PR #2486)."""
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_usage_command_includes_account_section(self, monkeypatch):
|
||||
agent = _make_mock_agent(provider="openai-codex")
|
||||
agent.base_url = "https://chatgpt.com/backend-api/codex"
|
||||
agent.api_key = "unused"
|
||||
runner = _make_runner(SK, cached_agent=agent)
|
||||
event = MagicMock()
|
||||
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.fetch_account_usage",
|
||||
lambda provider, base_url=None, api_key=None: object(),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.render_account_usage_lines",
|
||||
lambda snapshot, markdown=False: [
|
||||
"📈 **Account limits**",
|
||||
"Provider: openai-codex (Pro)",
|
||||
"Session: 85% remaining (15% used)",
|
||||
],
|
||||
)
|
||||
with patch("agent.rate_limit_tracker.format_rate_limit_compact", return_value="RPM: 50/60"), \
|
||||
patch("agent.usage_pricing.estimate_usage_cost") as mock_cost:
|
||||
mock_cost.return_value = MagicMock(amount_usd=None, status="included")
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert "📊 **Session Token Usage**" in result
|
||||
assert "📈 **Account limits**" in result
|
||||
assert "Provider: openai-codex (Pro)" in result
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_usage_command_uses_persisted_provider_when_agent_not_running(self, monkeypatch):
|
||||
runner = _make_runner(SK)
|
||||
runner._session_db = MagicMock()
|
||||
runner._session_db.get_session.return_value = {
|
||||
"billing_provider": "openai-codex",
|
||||
"billing_base_url": "https://chatgpt.com/backend-api/codex",
|
||||
}
|
||||
session_entry = MagicMock()
|
||||
session_entry.session_id = "sess-1"
|
||||
runner.session_store.get_or_create_session.return_value = session_entry
|
||||
runner.session_store.load_transcript.return_value = [
|
||||
{"role": "user", "content": "earlier"},
|
||||
]
|
||||
|
||||
calls = {}
|
||||
|
||||
async def _fake_to_thread(fn, *args, **kwargs):
|
||||
calls["args"] = args
|
||||
calls["kwargs"] = kwargs
|
||||
return fn(*args, **kwargs)
|
||||
|
||||
monkeypatch.setattr("gateway.run.asyncio.to_thread", _fake_to_thread)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.fetch_account_usage",
|
||||
lambda provider, base_url=None, api_key=None: object(),
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"gateway.run.render_account_usage_lines",
|
||||
lambda snapshot, markdown=False: [
|
||||
"📈 **Account limits**",
|
||||
"Provider: openai-codex (Pro)",
|
||||
],
|
||||
)
|
||||
|
||||
event = MagicMock()
|
||||
result = await runner._handle_usage_command(event)
|
||||
|
||||
assert calls["args"] == ("openai-codex",)
|
||||
assert calls["kwargs"]["base_url"] == "https://chatgpt.com/backend-api/codex"
|
||||
assert "📊 **Session Info**" in result
|
||||
assert "📈 **Account limits**" in result
|
||||
|
|
|
|||
|
|
@ -921,17 +921,13 @@ class TestKimiMoonshotModelListIsolation:
|
|||
leaked = set(moonshot_models) & coding_plan_only
|
||||
assert not leaked, f"Moonshot list contains Coding Plan-only models: {leaked}"
|
||||
|
||||
def test_moonshot_list_contains_shared_models(self):
|
||||
def test_moonshot_list_non_empty(self):
|
||||
from hermes_cli.main import _PROVIDER_MODELS
|
||||
moonshot_models = _PROVIDER_MODELS["moonshot"]
|
||||
assert "kimi-k2.5" in moonshot_models
|
||||
assert "kimi-k2-thinking" in moonshot_models
|
||||
assert len(_PROVIDER_MODELS["moonshot"]) >= 1
|
||||
|
||||
def test_coding_plan_list_contains_plan_specific_models(self):
|
||||
def test_coding_plan_list_non_empty(self):
|
||||
from hermes_cli.main import _PROVIDER_MODELS
|
||||
coding_models = _PROVIDER_MODELS["kimi-coding"]
|
||||
assert "kimi-for-coding" in coding_models
|
||||
assert "kimi-k2-thinking-turbo" in coding_models
|
||||
assert len(_PROVIDER_MODELS["kimi-coding"]) >= 1
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -944,14 +940,12 @@ class TestHuggingFaceModels:
|
|||
def test_main_provider_models_has_huggingface(self):
|
||||
from hermes_cli.main import _PROVIDER_MODELS
|
||||
assert "huggingface" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["huggingface"]
|
||||
assert len(models) >= 6, "Expected at least 6 curated HF models"
|
||||
assert len(_PROVIDER_MODELS["huggingface"]) >= 1
|
||||
|
||||
def test_models_py_has_huggingface(self):
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
assert "huggingface" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["huggingface"]
|
||||
assert len(models) >= 6
|
||||
assert len(_PROVIDER_MODELS["huggingface"]) >= 1
|
||||
|
||||
def test_model_lists_match(self):
|
||||
"""Model lists in main.py and models.py should be identical."""
|
||||
|
|
|
|||
|
|
@ -115,12 +115,12 @@ class TestArceeCredentials:
|
|||
|
||||
class TestArceeModelCatalog:
|
||||
def test_static_model_list(self):
|
||||
"""Arcee has a static _PROVIDER_MODELS catalog entry. Specific model
|
||||
names change with releases and don't belong in tests.
|
||||
"""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
assert "arcee" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["arcee"]
|
||||
assert "trinity-large-thinking" in models
|
||||
assert "trinity-large-preview" in models
|
||||
assert "trinity-mini" in models
|
||||
assert len(_PROVIDER_MODELS["arcee"]) >= 1
|
||||
|
||||
def test_canonical_provider_entry(self):
|
||||
from hermes_cli.models import CANONICAL_PROVIDERS
|
||||
|
|
|
|||
|
|
@ -1011,3 +1011,466 @@ def test_seed_from_singletons_respects_codex_suppression(tmp_path, monkeypatch):
|
|||
# Verify the auth store was NOT modified (no auto-import happened)
|
||||
after = json.loads((hermes_home / "auth.json").read_text())
|
||||
assert "openai-codex" not in after.get("providers", {})
|
||||
|
||||
|
||||
def test_auth_remove_env_seeded_suppresses_shell_exported_var(tmp_path, monkeypatch, capsys):
|
||||
"""`hermes auth remove xai 1` must stick even when the env var is exported
|
||||
by the shell (not written into ~/.hermes/.env). Before PR for #13371 the
|
||||
removal silently restored on next load_pool() because _seed_from_env()
|
||||
re-read os.environ. Now env:<VAR> is suppressed in auth.json.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
# Simulate shell export (NOT written to .env)
|
||||
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
|
||||
(hermes_home / ".env").write_text("")
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"xai": [{
|
||||
"id": "env-1",
|
||||
"label": "XAI_API_KEY",
|
||||
"auth_type": "api_key",
|
||||
"priority": 0,
|
||||
"source": "env:XAI_API_KEY",
|
||||
"access_token": "sk-xai-shell-export",
|
||||
"base_url": "https://api.x.ai/v1",
|
||||
}]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth_commands import auth_remove_command
|
||||
auth_remove_command(SimpleNamespace(provider="xai", target="1"))
|
||||
|
||||
# Suppression marker written
|
||||
after = json.loads((hermes_home / "auth.json").read_text())
|
||||
assert "env:XAI_API_KEY" in after.get("suppressed_sources", {}).get("xai", [])
|
||||
|
||||
# Diagnostic printed pointing at the shell
|
||||
out = capsys.readouterr().out
|
||||
assert "still set in your shell environment" in out
|
||||
assert "Cleared XAI_API_KEY from .env" not in out # wasn't in .env
|
||||
|
||||
# Fresh simulation: shell re-exports, reload pool
|
||||
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
|
||||
from agent.credential_pool import load_pool
|
||||
pool = load_pool("xai")
|
||||
assert not pool.has_credentials(), "pool must stay empty — env:XAI_API_KEY suppressed"
|
||||
|
||||
|
||||
def test_auth_remove_env_seeded_dotenv_only_no_shell_hint(tmp_path, monkeypatch, capsys):
|
||||
"""When the env var lives only in ~/.hermes/.env (not the shell), the
|
||||
shell-hint should NOT be printed — avoid scaring the user about a
|
||||
non-existent shell export.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
# Key ONLY in .env, shell must not have it
|
||||
monkeypatch.delenv("DEEPSEEK_API_KEY", raising=False)
|
||||
(hermes_home / ".env").write_text("DEEPSEEK_API_KEY=sk-ds-only\n")
|
||||
# Mimic load_env() populating os.environ
|
||||
monkeypatch.setenv("DEEPSEEK_API_KEY", "sk-ds-only")
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"deepseek": [{
|
||||
"id": "env-1",
|
||||
"label": "DEEPSEEK_API_KEY",
|
||||
"auth_type": "api_key",
|
||||
"priority": 0,
|
||||
"source": "env:DEEPSEEK_API_KEY",
|
||||
"access_token": "sk-ds-only",
|
||||
}]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth_commands import auth_remove_command
|
||||
auth_remove_command(SimpleNamespace(provider="deepseek", target="1"))
|
||||
|
||||
out = capsys.readouterr().out
|
||||
assert "Cleared DEEPSEEK_API_KEY from .env" in out
|
||||
assert "still set in your shell environment" not in out
|
||||
assert (hermes_home / ".env").read_text().strip() == ""
|
||||
|
||||
|
||||
def test_auth_add_clears_env_suppression_for_provider(tmp_path, monkeypatch):
|
||||
"""Re-adding a credential via `hermes auth add <provider>` clears any
|
||||
env:<VAR> suppression marker — strong signal the user wants auth back.
|
||||
Matches the Codex device_code re-link behaviour.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.delenv("XAI_API_KEY", raising=False)
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
from hermes_cli.auth_commands import auth_add_command
|
||||
|
||||
assert is_source_suppressed("xai", "env:XAI_API_KEY") is True
|
||||
auth_add_command(SimpleNamespace(
|
||||
provider="xai", auth_type="api_key",
|
||||
api_key="sk-xai-manual", label="manual",
|
||||
))
|
||||
assert is_source_suppressed("xai", "env:XAI_API_KEY") is False
|
||||
|
||||
|
||||
def test_seed_from_env_respects_env_suppression(tmp_path, monkeypatch):
|
||||
"""_seed_from_env() must skip env:<VAR> sources that the user suppressed
|
||||
via `hermes auth remove`. This is the gate that prevents shell-exported
|
||||
keys from resurrecting removed credentials.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.setenv("XAI_API_KEY", "sk-xai-shell-export")
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"xai": ["env:XAI_API_KEY"]},
|
||||
}))
|
||||
|
||||
from agent.credential_pool import _seed_from_env
|
||||
|
||||
entries = []
|
||||
changed, active = _seed_from_env("xai", entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert active == set()
|
||||
|
||||
|
||||
def test_seed_from_env_respects_openrouter_suppression(tmp_path, monkeypatch):
|
||||
"""OpenRouter is the special-case branch in _seed_from_env; verify it
|
||||
honours suppression too.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-shell-export")
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"openrouter": ["env:OPENROUTER_API_KEY"]},
|
||||
}))
|
||||
|
||||
from agent.credential_pool import _seed_from_env
|
||||
|
||||
entries = []
|
||||
changed, active = _seed_from_env("openrouter", entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert active == set()
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Unified credential-source stickiness — every source Hermes reads from has a
|
||||
# registered RemovalStep in agent.credential_sources, and every seeding path
|
||||
# gates on is_source_suppressed. Below: one test per source proving remove
|
||||
# sticks across a fresh load_pool() call.
|
||||
# =============================================================================
|
||||
|
||||
|
||||
def test_seed_from_singletons_respects_nous_suppression(tmp_path, monkeypatch):
|
||||
"""nous device_code must not re-seed from auth.json when suppressed."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {"nous": {"access_token": "tok", "refresh_token": "r", "expires_at": 9999999999}},
|
||||
"suppressed_sources": {"nous": ["device_code"]},
|
||||
}))
|
||||
|
||||
from agent.credential_pool import _seed_from_singletons
|
||||
entries = []
|
||||
changed, active = _seed_from_singletons("nous", entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert active == set()
|
||||
|
||||
|
||||
def test_seed_from_singletons_respects_copilot_suppression(tmp_path, monkeypatch):
|
||||
"""copilot gh_cli must not re-seed when suppressed."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"copilot": ["gh_cli"]},
|
||||
}))
|
||||
|
||||
# Stub resolve_copilot_token to return a live token
|
||||
import hermes_cli.copilot_auth as ca
|
||||
monkeypatch.setattr(ca, "resolve_copilot_token", lambda: ("ghp_fake", "gh auth token"))
|
||||
|
||||
from agent.credential_pool import _seed_from_singletons
|
||||
entries = []
|
||||
changed, active = _seed_from_singletons("copilot", entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert active == set()
|
||||
|
||||
|
||||
def test_seed_from_singletons_respects_qwen_suppression(tmp_path, monkeypatch):
|
||||
"""qwen-oauth qwen-cli must not re-seed from ~/.qwen/oauth_creds.json when suppressed."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"qwen-oauth": ["qwen-cli"]},
|
||||
}))
|
||||
|
||||
import hermes_cli.auth as ha
|
||||
monkeypatch.setattr(ha, "resolve_qwen_runtime_credentials", lambda **kw: {
|
||||
"api_key": "tok", "source": "qwen-cli", "base_url": "https://q",
|
||||
})
|
||||
|
||||
from agent.credential_pool import _seed_from_singletons
|
||||
entries = []
|
||||
changed, active = _seed_from_singletons("qwen-oauth", entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert active == set()
|
||||
|
||||
|
||||
def test_seed_from_singletons_respects_hermes_pkce_suppression(tmp_path, monkeypatch):
|
||||
"""anthropic hermes_pkce must not re-seed from ~/.hermes/.anthropic_oauth.json when suppressed."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
import yaml
|
||||
(hermes_home / "config.yaml").write_text(yaml.dump({"model": {"provider": "anthropic", "model": "claude"}}))
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {"anthropic": ["hermes_pkce"]},
|
||||
}))
|
||||
|
||||
# Stub the readers so only hermes_pkce is "available"; claude_code returns None
|
||||
import agent.anthropic_adapter as aa
|
||||
monkeypatch.setattr(aa, "read_hermes_oauth_credentials", lambda: {
|
||||
"accessToken": "tok", "refreshToken": "r", "expiresAt": 9999999999000,
|
||||
})
|
||||
monkeypatch.setattr(aa, "read_claude_code_credentials", lambda: None)
|
||||
|
||||
from agent.credential_pool import _seed_from_singletons
|
||||
entries = []
|
||||
changed, active = _seed_from_singletons("anthropic", entries)
|
||||
# hermes_pkce suppressed, claude_code returns None → nothing should be seeded
|
||||
assert entries == []
|
||||
assert "hermes_pkce" not in active
|
||||
|
||||
|
||||
def test_seed_custom_pool_respects_config_suppression(tmp_path, monkeypatch):
|
||||
"""Custom provider config:<name> source must not re-seed when suppressed."""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
import yaml
|
||||
(hermes_home / "config.yaml").write_text(yaml.dump({
|
||||
"model": {},
|
||||
"custom_providers": [
|
||||
{"name": "my", "base_url": "https://c.example.com", "api_key": "sk-custom"},
|
||||
],
|
||||
}))
|
||||
|
||||
from agent.credential_pool import _seed_custom_pool, get_custom_provider_pool_key
|
||||
pool_key = get_custom_provider_pool_key("https://c.example.com")
|
||||
|
||||
(hermes_home / "auth.json").write_text(json.dumps({
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {pool_key: ["config:my"]},
|
||||
}))
|
||||
|
||||
entries = []
|
||||
changed, active = _seed_custom_pool(pool_key, entries)
|
||||
assert changed is False
|
||||
assert entries == []
|
||||
assert "config:my" not in active
|
||||
|
||||
|
||||
def test_credential_sources_registry_has_expected_steps():
|
||||
"""Sanity check — the registry contains the expected RemovalSteps.
|
||||
|
||||
Guards against accidentally dropping a step during future refactors.
|
||||
If you add a new credential source, add it to the expected set below.
|
||||
"""
|
||||
from agent.credential_sources import _REGISTRY
|
||||
|
||||
descriptions = {step.description for step in _REGISTRY}
|
||||
expected = {
|
||||
"gh auth token / COPILOT_GITHUB_TOKEN / GH_TOKEN",
|
||||
"Any env-seeded credential (XAI_API_KEY, DEEPSEEK_API_KEY, etc.)",
|
||||
"~/.claude/.credentials.json",
|
||||
"~/.hermes/.anthropic_oauth.json",
|
||||
"auth.json providers.nous",
|
||||
"auth.json providers.openai-codex + ~/.codex/auth.json",
|
||||
"~/.qwen/oauth_creds.json",
|
||||
"Custom provider config.yaml api_key field",
|
||||
}
|
||||
assert descriptions == expected, f"Registry mismatch. Got: {descriptions}"
|
||||
|
||||
|
||||
def test_credential_sources_find_step_returns_none_for_manual():
|
||||
"""Manual entries have nothing external to clean up — no step registered."""
|
||||
from agent.credential_sources import find_removal_step
|
||||
assert find_removal_step("openrouter", "manual") is None
|
||||
assert find_removal_step("xai", "manual") is None
|
||||
|
||||
|
||||
def test_credential_sources_find_step_copilot_before_generic_env(tmp_path, monkeypatch):
|
||||
"""copilot env:GH_TOKEN must dispatch to the copilot step, not the
|
||||
generic env-var step. The copilot step handles the duplicate-source
|
||||
problem (same token seeded as both gh_cli and env:<VAR>); the generic
|
||||
env step would only suppress one of the variants.
|
||||
"""
|
||||
from agent.credential_sources import find_removal_step
|
||||
|
||||
step = find_removal_step("copilot", "env:GH_TOKEN")
|
||||
assert step is not None
|
||||
assert "copilot" in step.description.lower() or "gh" in step.description.lower()
|
||||
|
||||
# Generic step still matches any other provider's env var
|
||||
step = find_removal_step("xai", "env:XAI_API_KEY")
|
||||
assert step is not None
|
||||
assert "env-seeded" in step.description.lower()
|
||||
|
||||
|
||||
def test_auth_remove_copilot_suppresses_all_variants(tmp_path, monkeypatch):
|
||||
"""Removing any copilot source must suppress gh_cli + all env:* variants
|
||||
so the duplicate-seed paths don't resurrect the credential.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"credential_pool": {
|
||||
"copilot": [{
|
||||
"id": "c1",
|
||||
"label": "gh auth token",
|
||||
"auth_type": "api_key",
|
||||
"priority": 0,
|
||||
"source": "gh_cli",
|
||||
"access_token": "ghp_fake",
|
||||
}]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
from hermes_cli.auth_commands import auth_remove_command
|
||||
|
||||
auth_remove_command(SimpleNamespace(provider="copilot", target="1"))
|
||||
|
||||
assert is_source_suppressed("copilot", "gh_cli")
|
||||
assert is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
|
||||
assert is_source_suppressed("copilot", "env:GH_TOKEN")
|
||||
assert is_source_suppressed("copilot", "env:GITHUB_TOKEN")
|
||||
|
||||
|
||||
def test_auth_add_clears_all_suppressions_including_non_env(tmp_path, monkeypatch):
|
||||
"""Re-adding a credential via `hermes auth add <provider>` clears ALL
|
||||
suppression markers for the provider, not just env:*. This matches
|
||||
the single "re-engage" semantic — the user wants auth back, period.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"providers": {},
|
||||
"suppressed_sources": {
|
||||
"copilot": ["gh_cli", "env:GH_TOKEN", "env:COPILOT_GITHUB_TOKEN"],
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
from hermes_cli.auth_commands import auth_add_command
|
||||
|
||||
auth_add_command(SimpleNamespace(
|
||||
provider="copilot", auth_type="api_key",
|
||||
api_key="ghp-manual", label="m",
|
||||
))
|
||||
|
||||
assert not is_source_suppressed("copilot", "gh_cli")
|
||||
assert not is_source_suppressed("copilot", "env:GH_TOKEN")
|
||||
assert not is_source_suppressed("copilot", "env:COPILOT_GITHUB_TOKEN")
|
||||
|
||||
|
||||
def test_auth_remove_codex_manual_device_code_suppresses_canonical(tmp_path, monkeypatch):
|
||||
"""Removing a manual:device_code entry (from `hermes auth add openai-codex`)
|
||||
must suppress the canonical ``device_code`` key, not ``manual:device_code``.
|
||||
The re-seed gate in _seed_from_singletons checks ``device_code``.
|
||||
"""
|
||||
hermes_home = tmp_path / "hermes"
|
||||
hermes_home.mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
|
||||
|
||||
_write_auth_store(
|
||||
tmp_path,
|
||||
{
|
||||
"version": 1,
|
||||
"providers": {"openai-codex": {"tokens": {"access_token": "t", "refresh_token": "r"}}},
|
||||
"credential_pool": {
|
||||
"openai-codex": [{
|
||||
"id": "cdx",
|
||||
"label": "manual-codex",
|
||||
"auth_type": "oauth",
|
||||
"priority": 0,
|
||||
"source": "manual:device_code",
|
||||
"access_token": "t",
|
||||
}]
|
||||
},
|
||||
},
|
||||
)
|
||||
|
||||
from types import SimpleNamespace
|
||||
from hermes_cli.auth import is_source_suppressed
|
||||
from hermes_cli.auth_commands import auth_remove_command
|
||||
|
||||
auth_remove_command(SimpleNamespace(provider="openai-codex", target="1"))
|
||||
assert is_source_suppressed("openai-codex", "device_code")
|
||||
|
|
|
|||
|
|
@ -459,7 +459,8 @@ class TestCustomProviderCompatibility:
|
|||
migrate_config(interactive=False, quiet=True)
|
||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
assert raw["_config_version"] == 21
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
assert raw["providers"]["openai-direct"] == {
|
||||
"api": "https://api.openai.com/v1",
|
||||
"api_key": "test-key",
|
||||
|
|
@ -501,7 +502,8 @@ class TestCustomProviderCompatibility:
|
|||
assert compatible[0]["provider_key"] == "openai-direct"
|
||||
assert compatible[0]["api_mode"] == "codex_responses"
|
||||
|
||||
def test_compatible_custom_providers_prefers_api_then_url_then_base_url(self, tmp_path):
|
||||
def test_compatible_custom_providers_prefers_base_url_then_url_then_api(self, tmp_path):
|
||||
"""URL field precedence is base_url > url > api (PR #9332)."""
|
||||
config_path = tmp_path / "config.yaml"
|
||||
config_path.write_text(
|
||||
yaml.safe_dump(
|
||||
|
|
@ -526,7 +528,7 @@ class TestCustomProviderCompatibility:
|
|||
assert compatible == [
|
||||
{
|
||||
"name": "My Provider",
|
||||
"base_url": "https://api.example.com/v1",
|
||||
"base_url": "https://base.example.com/v1",
|
||||
"provider_key": "my-provider",
|
||||
}
|
||||
]
|
||||
|
|
@ -606,7 +608,8 @@ class TestInterimAssistantMessageConfig:
|
|||
migrate_config(interactive=False, quiet=True)
|
||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
assert raw["_config_version"] == 21
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
assert raw["display"]["tool_progress"] == "off"
|
||||
assert raw["display"]["interim_assistant_messages"] is True
|
||||
|
||||
|
|
@ -626,7 +629,8 @@ class TestDiscordChannelPromptsConfig:
|
|||
migrate_config(interactive=False, quiet=True)
|
||||
raw = yaml.safe_load(config_path.read_text(encoding="utf-8"))
|
||||
|
||||
assert raw["_config_version"] == 21
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
assert raw["_config_version"] == DEFAULT_CONFIG["_config_version"]
|
||||
assert raw["discord"]["auto_thread"] is True
|
||||
assert raw["discord"]["channel_prompts"] == {}
|
||||
|
||||
|
|
|
|||
|
|
@ -125,18 +125,12 @@ class TestGeminiCredentials:
|
|||
# ── Model Catalog ──
|
||||
|
||||
class TestGeminiModelCatalog:
|
||||
def test_provider_models_exist(self):
|
||||
def test_provider_entry_exists(self):
|
||||
"""Gemini provider has a model catalog entry. Specific model names
|
||||
are data that changes with Google releases and don't belong in tests.
|
||||
"""
|
||||
assert "gemini" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["gemini"]
|
||||
assert "gemini-2.5-pro" in models
|
||||
assert "gemini-2.5-flash" in models
|
||||
assert "gemma-4-31b-it" not in models
|
||||
|
||||
def test_provider_models_has_3x(self):
|
||||
models = _PROVIDER_MODELS["gemini"]
|
||||
assert "gemini-3.1-pro-preview" in models
|
||||
assert "gemini-3-flash-preview" in models
|
||||
assert "gemini-3.1-flash-lite-preview" in models
|
||||
assert len(_PROVIDER_MODELS["gemini"]) >= 1
|
||||
|
||||
def test_provider_label(self):
|
||||
assert "gemini" in _PROVIDER_LABELS
|
||||
|
|
|
|||
|
|
@ -457,29 +457,62 @@ class TestValidateApiNotFound:
|
|||
assert "not found" in result["message"]
|
||||
|
||||
|
||||
# -- validate — API unreachable — reject with guidance ----------------
|
||||
# -- validate — API unreachable — soft-accept via catalog or warning --------
|
||||
|
||||
class TestValidateApiFallback:
|
||||
def test_any_model_rejected_when_api_down(self):
|
||||
result = _validate("anthropic/claude-opus-4.6", api_models=None)
|
||||
assert result["accepted"] is False
|
||||
assert result["persist"] is False
|
||||
"""When /models is unreachable, the validator must accept the model (with
|
||||
a warning) rather than reject it outright — otherwise provider switches
|
||||
fail in the gateway for any provider whose /models endpoint is down or
|
||||
doesn't exist (e.g. opencode-go returns 404 HTML).
|
||||
|
||||
def test_unknown_model_also_rejected_when_api_down(self):
|
||||
result = _validate("anthropic/claude-next-gen", api_models=None)
|
||||
assert result["accepted"] is False
|
||||
assert result["persist"] is False
|
||||
assert "could not reach" in result["message"].lower()
|
||||
Two paths:
|
||||
1. Provider has a curated catalog (``_PROVIDER_MODELS`` / live fetch):
|
||||
validate against it (recognized=True for known models,
|
||||
recognized=False with 'Note:' for unknown).
|
||||
2. Provider has no catalog: accept with a generic 'Note:' warning.
|
||||
|
||||
def test_zai_model_rejected_when_api_down(self):
|
||||
In both cases ``accepted`` and ``persist`` must be True so the gateway can
|
||||
write the ``_session_model_overrides`` entry.
|
||||
"""
|
||||
|
||||
def test_known_model_accepted_via_catalog_when_api_down(self):
|
||||
# Force the openrouter catalog lookup to return a deterministic list.
|
||||
with patch(
|
||||
"hermes_cli.models.provider_model_ids",
|
||||
return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
|
||||
):
|
||||
result = _validate("anthropic/claude-opus-4.6", api_models=None)
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is True
|
||||
|
||||
def test_unknown_model_accepted_with_note_when_api_down(self):
|
||||
with patch(
|
||||
"hermes_cli.models.provider_model_ids",
|
||||
return_value=["anthropic/claude-opus-4.6", "openai/gpt-5.4"],
|
||||
):
|
||||
result = _validate("anthropic/claude-next-gen", api_models=None)
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is False
|
||||
# Message flags it as unverified against the catalog.
|
||||
assert "not found" in result["message"].lower() or "note" in result["message"].lower()
|
||||
|
||||
def test_zai_known_model_accepted_via_catalog_when_api_down(self):
|
||||
# glm-5 is in the zai curated catalog (_PROVIDER_MODELS["zai"]).
|
||||
result = _validate("glm-5", provider="zai", api_models=None)
|
||||
assert result["accepted"] is False
|
||||
assert result["persist"] is False
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is True
|
||||
|
||||
def test_unknown_provider_rejected_when_api_down(self):
|
||||
result = _validate("some-model", provider="totally-unknown", api_models=None)
|
||||
assert result["accepted"] is False
|
||||
assert result["persist"] is False
|
||||
def test_unknown_provider_soft_accepted_when_api_down(self):
|
||||
# No catalog for unknown providers — soft-accept with a Note.
|
||||
with patch("hermes_cli.models.provider_model_ids", return_value=[]):
|
||||
result = _validate("some-model", provider="totally-unknown", api_models=None)
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is False
|
||||
assert "note" in result["message"].lower()
|
||||
|
||||
def test_custom_endpoint_warns_with_probed_url_and_v1_hint(self):
|
||||
with patch(
|
||||
|
|
|
|||
|
|
@ -88,6 +88,131 @@ class TestFetchOpenRouterModels:
|
|||
|
||||
assert models == OPENROUTER_MODELS
|
||||
|
||||
def test_filters_out_models_without_tool_support(self, monkeypatch):
|
||||
"""Models whose supported_parameters omits 'tools' must not appear in the picker.
|
||||
|
||||
hermes-agent is tool-calling-first — surfacing a non-tool model leads to
|
||||
immediate runtime failures when the user selects it. Ported from
|
||||
Kilo-Org/kilocode#9068.
|
||||
"""
|
||||
class _Resp:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self):
|
||||
# opus-4.6 advertises tools → kept
|
||||
# nano-image has explicit supported_parameters that OMITS tools → dropped
|
||||
# qwen3.6-plus advertises tools → kept
|
||||
return (
|
||||
b'{"data":['
|
||||
b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"},'
|
||||
b'"supported_parameters":["temperature","tools","tool_choice"]},'
|
||||
b'{"id":"google/gemini-3-pro-image-preview","pricing":{"prompt":"0.00001","completion":"0.00003"},'
|
||||
b'"supported_parameters":["temperature","response_format"]},'
|
||||
b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"},'
|
||||
b'"supported_parameters":["tools","temperature"]}'
|
||||
b']}'
|
||||
)
|
||||
|
||||
# Include the image-only id in the curated list so it has a chance to be surfaced.
|
||||
monkeypatch.setattr(
|
||||
_models_mod,
|
||||
"OPENROUTER_MODELS",
|
||||
[
|
||||
("anthropic/claude-opus-4.6", ""),
|
||||
("google/gemini-3-pro-image-preview", ""),
|
||||
("qwen/qwen3.6-plus", ""),
|
||||
],
|
||||
)
|
||||
monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
|
||||
with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
|
||||
models = fetch_openrouter_models(force_refresh=True)
|
||||
|
||||
ids = [mid for mid, _ in models]
|
||||
assert "anthropic/claude-opus-4.6" in ids
|
||||
assert "qwen/qwen3.6-plus" in ids
|
||||
# Image-only model advertised supported_parameters WITHOUT tools → must be dropped.
|
||||
assert "google/gemini-3-pro-image-preview" not in ids
|
||||
|
||||
def test_permissive_when_supported_parameters_missing(self, monkeypatch):
|
||||
"""Models missing the supported_parameters field keep appearing in the picker.
|
||||
|
||||
Some OpenRouter-compatible gateways (Nous Portal, private mirrors, older
|
||||
catalog snapshots) don't populate supported_parameters. Treating missing
|
||||
as 'unknown → allow' prevents the picker from silently emptying on
|
||||
those gateways.
|
||||
"""
|
||||
class _Resp:
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def read(self):
|
||||
# No supported_parameters field at all on either entry.
|
||||
return (
|
||||
b'{"data":['
|
||||
b'{"id":"anthropic/claude-opus-4.6","pricing":{"prompt":"0.000015","completion":"0.000075"}},'
|
||||
b'{"id":"qwen/qwen3.6-plus","pricing":{"prompt":"0.000000325","completion":"0.00000195"}}'
|
||||
b']}'
|
||||
)
|
||||
|
||||
monkeypatch.setattr(_models_mod, "_openrouter_catalog_cache", None)
|
||||
with patch("hermes_cli.models.urllib.request.urlopen", return_value=_Resp()):
|
||||
models = fetch_openrouter_models(force_refresh=True)
|
||||
|
||||
ids = [mid for mid, _ in models]
|
||||
assert "anthropic/claude-opus-4.6" in ids
|
||||
assert "qwen/qwen3.6-plus" in ids
|
||||
|
||||
|
||||
class TestOpenRouterToolSupportHelper:
|
||||
"""Unit tests for _openrouter_model_supports_tools (Kilo port #9068)."""
|
||||
|
||||
def test_tools_in_supported_parameters(self):
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools(
|
||||
{"id": "x", "supported_parameters": ["temperature", "tools"]}
|
||||
) is True
|
||||
|
||||
def test_tools_missing_from_supported_parameters(self):
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools(
|
||||
{"id": "x", "supported_parameters": ["temperature", "response_format"]}
|
||||
) is False
|
||||
|
||||
def test_supported_parameters_absent_is_permissive(self):
|
||||
"""Missing field → allow (so older / non-OR gateways still work)."""
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools({"id": "x"}) is True
|
||||
|
||||
def test_supported_parameters_none_is_permissive(self):
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools({"id": "x", "supported_parameters": None}) is True
|
||||
|
||||
def test_supported_parameters_malformed_is_permissive(self):
|
||||
"""Malformed (non-list) value → allow rather than silently drop."""
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools(
|
||||
{"id": "x", "supported_parameters": "tools,temperature"}
|
||||
) is True
|
||||
|
||||
def test_non_dict_item_is_permissive(self):
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools(None) is True
|
||||
assert _openrouter_model_supports_tools("anthropic/claude-opus-4.6") is True
|
||||
|
||||
def test_empty_supported_parameters_list_drops_model(self):
|
||||
"""Explicit empty list → no tools → drop."""
|
||||
from hermes_cli.models import _openrouter_model_supports_tools
|
||||
assert _openrouter_model_supports_tools(
|
||||
{"id": "x", "supported_parameters": []}
|
||||
) is False
|
||||
|
||||
|
||||
class TestFindOpenrouterSlug:
|
||||
def test_exact_match(self):
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ def test_opencode_go_appears_when_api_key_set():
|
|||
opencode_go = next((p for p in providers if p["slug"] == "opencode-go"), None)
|
||||
|
||||
assert opencode_go is not None, "opencode-go should appear when OPENCODE_GO_API_KEY is set"
|
||||
assert opencode_go["models"] == ["kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
|
||||
assert opencode_go["models"] == ["kimi-k2.6", "kimi-k2.5", "glm-5.1", "glm-5", "mimo-v2-pro", "mimo-v2-omni", "minimax-m2.7", "minimax-m2.5"]
|
||||
# opencode-go can appear as "built-in" (from PROVIDER_TO_MODELS_DEV when
|
||||
# models.dev is reachable) or "hermes" (from HERMES_OVERLAYS fallback when
|
||||
# the API is unavailable, e.g. in CI).
|
||||
|
|
|
|||
133
tests/hermes_cli/test_opencode_go_validation_fallback.py
Normal file
133
tests/hermes_cli/test_opencode_go_validation_fallback.py
Normal file
|
|
@ -0,0 +1,133 @@
|
|||
"""Tests for the static-catalog fallback in validate_requested_model.
|
||||
|
||||
OpenCode Go and OpenCode Zen publish an OpenAI-compatible API at paths that do
|
||||
NOT expose ``/models`` (the path returns the marketing site's HTML 404). This
|
||||
caused ``validate_requested_model`` to return ``accepted=False`` for every
|
||||
model on those providers, which in turn made ``switch_model()`` fail and the
|
||||
gateway's ``/model <name> --provider opencode-go`` command never write to
|
||||
``_session_model_overrides``.
|
||||
|
||||
These tests cover the catalog-fallback path: when ``fetch_api_models`` returns
|
||||
``None``, the validator must consult ``provider_model_ids()`` for the provider
|
||||
(populated from ``_PROVIDER_MODELS``) rather than rejecting outright.
|
||||
"""
|
||||
|
||||
from unittest.mock import patch
|
||||
|
||||
from hermes_cli.models import validate_requested_model
|
||||
|
||||
|
||||
_UNREACHABLE_PROBE = {
|
||||
"models": None,
|
||||
"probed_url": "https://opencode.ai/zen/go/v1/models",
|
||||
"resolved_base_url": "https://opencode.ai/zen/go/v1",
|
||||
"suggested_base_url": None,
|
||||
"used_fallback": False,
|
||||
}
|
||||
|
||||
|
||||
def _patched(func):
|
||||
"""Decorator: force fetch_api_models / probe_api_models to simulate an
|
||||
unreachable /models endpoint, proving the catalog path is used."""
|
||||
def wrapper(*args, **kwargs):
|
||||
with patch("hermes_cli.models.fetch_api_models", return_value=None), \
|
||||
patch("hermes_cli.models.probe_api_models", return_value=_UNREACHABLE_PROBE):
|
||||
return func(*args, **kwargs)
|
||||
wrapper.__name__ = func.__name__
|
||||
return wrapper
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# opencode-go: curated catalog in _PROVIDER_MODELS
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_go_known_model_accepted():
|
||||
"""A model present in the opencode-go curated catalog must be accepted
|
||||
even when /models is unreachable."""
|
||||
result = validate_requested_model("kimi-k2.6", "opencode-go")
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is True
|
||||
assert result["message"] is None
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_go_known_model_case_insensitive():
|
||||
"""Catalog lookup is case-insensitive."""
|
||||
result = validate_requested_model("KIMI-K2.6", "opencode-go")
|
||||
assert result["accepted"] is True
|
||||
assert result["recognized"] is True
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_go_typo_auto_corrected():
|
||||
"""A close typo (>= 0.9 similarity) is auto-corrected to the catalog
|
||||
entry."""
|
||||
# 'kimi-k2.55' vs 'kimi-k2.5' ratio ≈ 0.95 — within the 0.9 cutoff.
|
||||
result = validate_requested_model("kimi-k2.55", "opencode-go")
|
||||
assert result["accepted"] is True
|
||||
assert result["recognized"] is True
|
||||
assert result.get("corrected_model") == "kimi-k2.5"
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_go_unknown_model_accepted_with_suggestion():
|
||||
"""An unknown model that has a medium-similarity match (>= 0.5 but < 0.9)
|
||||
is accepted with recognized=False and a 'similar models' hint. The key
|
||||
invariant: the gateway MUST be able to persist this override, so
|
||||
accepted/persist must both be True."""
|
||||
# 'kimi-k3-preview' vs 'kimi-k2.6' — similar enough to suggest, not to auto-correct.
|
||||
result = validate_requested_model("kimi-k3-preview", "opencode-go")
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is False
|
||||
assert "kimi-k3-preview" in result["message"]
|
||||
assert "curated catalog" in result["message"]
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_go_totally_unknown_model_still_accepted():
|
||||
"""A model with zero similarity to the catalog is still accepted (no
|
||||
suggestion line) so the user can try a model that hasn't made it into the
|
||||
curated list yet."""
|
||||
result = validate_requested_model("some-brand-new-model", "opencode-go")
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is False
|
||||
# No suggestion text (no close matches)
|
||||
assert "Similar models" not in result["message"]
|
||||
assert "opencode" in result["message"].lower() or "opencode go" in result["message"].lower()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# opencode-zen: same pattern as opencode-go
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@_patched
|
||||
def test_opencode_zen_known_model_accepted():
|
||||
"""opencode-zen also uses _PROVIDER_MODELS; kimi-k2 is in its catalog."""
|
||||
result = validate_requested_model("kimi-k2", "opencode-zen")
|
||||
assert result["accepted"] is True
|
||||
assert result["recognized"] is True
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unknown provider with no catalog: soft-accept (honors the comment's intent)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
@_patched
|
||||
def test_provider_without_catalog_accepts_with_warning():
|
||||
"""When a provider has no entry in _PROVIDER_MODELS and /models is
|
||||
unreachable, accept the model with a 'Note:' warning rather than reject.
|
||||
This matches the in-code comment: 'Accept and persist, but warn so typos
|
||||
don't silently break things.'"""
|
||||
# Use a made-up provider name that won't resolve to any catalog.
|
||||
result = validate_requested_model("some-model", "provider-that-does-not-exist")
|
||||
assert result["accepted"] is True
|
||||
assert result["persist"] is True
|
||||
assert result["recognized"] is False
|
||||
assert "Note:" in result["message"]
|
||||
|
|
@ -1412,3 +1412,90 @@ def test_named_custom_runtime_no_model_when_absent(monkeypatch):
|
|||
|
||||
resolved = rp.resolve_runtime_provider(requested="my-server")
|
||||
assert "model" not in resolved
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# GHSA-76xc-57q6-vm5m — Ollama URL substring leak
|
||||
#
|
||||
# Same bug class as the previously-fixed GHSA-xf8p-v2cg-h7h5 (OpenRouter).
|
||||
# _resolve_openrouter_runtime's custom-endpoint branch selects OLLAMA_API_KEY
|
||||
# when the base_url "looks like" ollama.com. Previous implementation used
|
||||
# raw substring match; a custom base_url whose PATH or look-alike host
|
||||
# merely contained "ollama.com" leaked OLLAMA_API_KEY to that endpoint.
|
||||
# Fix: use base_url_host_matches (same helper as the OpenRouter sweep).
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestOllamaUrlSubstringLeak:
|
||||
"""Call-site regression tests for the fix in _resolve_openrouter_runtime."""
|
||||
|
||||
def _make_cfg(self, base_url):
|
||||
return {"base_url": base_url, "api_key": "", "provider": "custom"}
|
||||
|
||||
def test_ollama_key_not_leaked_to_path_injection(self, monkeypatch):
|
||||
"""http://127.0.0.1:9000/ollama.com/v1 — attacker endpoint with
|
||||
ollama.com in PATH. Must resolve to OPENAI_API_KEY, not OLLAMA_API_KEY."""
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
|
||||
monkeypatch.setenv("OPENROUTER_API_KEY", "or-secret")
|
||||
monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"http://127.0.0.1:9000/ollama.com/v1"
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert "ol-SECRET" not in resolved["api_key"], (
|
||||
"OLLAMA_API_KEY must not be sent to an endpoint whose "
|
||||
"hostname is not ollama.com (GHSA-76xc-57q6-vm5m)"
|
||||
)
|
||||
assert resolved["api_key"] == "oa-secret"
|
||||
|
||||
def test_ollama_key_not_leaked_to_lookalike_host(self, monkeypatch):
|
||||
"""ollama.com.attacker.test — look-alike host. OLLAMA_API_KEY
|
||||
must not be sent."""
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
|
||||
monkeypatch.setenv("OLLAMA_API_KEY", "ol-SECRET-should-not-leak")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"http://ollama.com.attacker.test:9000/v1"
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert "ol-SECRET" not in resolved["api_key"]
|
||||
assert resolved["api_key"] == "oa-secret"
|
||||
|
||||
def test_ollama_key_sent_to_genuine_ollama_com(self, monkeypatch):
|
||||
"""https://ollama.com/v1 — legit Ollama Cloud. OLLAMA_API_KEY
|
||||
should be used."""
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
|
||||
monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"https://ollama.com/v1"
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_key"] == "ol-legit-key"
|
||||
|
||||
def test_ollama_key_sent_to_ollama_subdomain(self, monkeypatch):
|
||||
"""https://api.ollama.com/v1 — legit subdomain."""
|
||||
monkeypatch.setenv("OPENAI_API_KEY", "oa-secret")
|
||||
monkeypatch.setenv("OLLAMA_API_KEY", "ol-legit-key")
|
||||
monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "custom")
|
||||
monkeypatch.setattr(rp, "_get_model_config", lambda: self._make_cfg(
|
||||
"https://api.ollama.com/v1"
|
||||
))
|
||||
monkeypatch.setattr(rp, "load_pool", lambda provider: None)
|
||||
monkeypatch.setattr(rp, "_try_resolve_from_custom_pool", lambda *a, **k: None)
|
||||
|
||||
resolved = rp.resolve_runtime_provider(requested="custom")
|
||||
|
||||
assert resolved["api_key"] == "ol-legit-key"
|
||||
|
|
|
|||
148
tests/hermes_cli/test_web_server_host_header.py
Normal file
148
tests/hermes_cli/test_web_server_host_header.py
Normal file
|
|
@ -0,0 +1,148 @@
|
|||
"""Tests for GHSA-ppp5-vxwm-4cf7 — Host-header validation.
|
||||
|
||||
DNS rebinding defence: a victim browser that has the dashboard open
|
||||
could be tricked into fetching from an attacker-controlled hostname
|
||||
that TTL-flips to 127.0.0.1. Same-origin / CORS checks won't help —
|
||||
the browser now treats the attacker origin as same-origin. Validating
|
||||
the Host header at the application layer rejects the attack.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
_repo = str(Path(__file__).resolve().parents[1])
|
||||
if _repo not in sys.path:
|
||||
sys.path.insert(0, _repo)
|
||||
|
||||
|
||||
class TestHostHeaderValidator:
|
||||
"""Unit test the _is_accepted_host helper directly — cheaper and
|
||||
more thorough than spinning up the full FastAPI app."""
|
||||
|
||||
def test_loopback_bind_accepts_loopback_names(self):
|
||||
from hermes_cli.web_server import _is_accepted_host
|
||||
|
||||
for bound in ("127.0.0.1", "localhost", "::1"):
|
||||
for host_header in (
|
||||
"127.0.0.1", "127.0.0.1:9119",
|
||||
"localhost", "localhost:9119",
|
||||
"[::1]", "[::1]:9119",
|
||||
):
|
||||
assert _is_accepted_host(host_header, bound), (
|
||||
f"bound={bound} must accept host={host_header}"
|
||||
)
|
||||
|
||||
def test_loopback_bind_rejects_attacker_hostnames(self):
|
||||
"""The core rebinding defence: attacker-controlled hosts that
|
||||
TTL-flip to 127.0.0.1 must be rejected."""
|
||||
from hermes_cli.web_server import _is_accepted_host
|
||||
|
||||
for bound in ("127.0.0.1", "localhost"):
|
||||
for attacker in (
|
||||
"evil.example",
|
||||
"evil.example:9119",
|
||||
"rebind.attacker.test:80",
|
||||
"localhost.attacker.test", # subdomain trick
|
||||
"127.0.0.1.evil.test", # lookalike IP prefix
|
||||
"", # missing Host
|
||||
):
|
||||
assert not _is_accepted_host(attacker, bound), (
|
||||
f"bound={bound} must reject attacker host={attacker!r}"
|
||||
)
|
||||
|
||||
def test_zero_zero_bind_accepts_anything(self):
|
||||
"""0.0.0.0 means operator explicitly opted into all-interfaces
|
||||
(requires --insecure). No Host-layer defence is possible — rely
|
||||
on operator network controls."""
|
||||
from hermes_cli.web_server import _is_accepted_host
|
||||
|
||||
for host in ("10.0.0.5", "evil.example", "my-server.corp.net"):
|
||||
assert _is_accepted_host(host, "0.0.0.0")
|
||||
assert _is_accepted_host(host + ":9119", "0.0.0.0")
|
||||
|
||||
def test_explicit_non_loopback_bind_requires_exact_match(self):
|
||||
"""If the operator bound to a specific non-loopback hostname,
|
||||
the Host header must match exactly."""
|
||||
from hermes_cli.web_server import _is_accepted_host
|
||||
|
||||
assert _is_accepted_host("my-server.corp.net", "my-server.corp.net")
|
||||
assert _is_accepted_host("my-server.corp.net:9119", "my-server.corp.net")
|
||||
# Different host — reject
|
||||
assert not _is_accepted_host("evil.example", "my-server.corp.net")
|
||||
# Loopback — reject (we bound to a specific non-loopback name)
|
||||
assert not _is_accepted_host("localhost", "my-server.corp.net")
|
||||
|
||||
def test_case_insensitive_comparison(self):
|
||||
"""Host headers are case-insensitive per RFC — accept variations."""
|
||||
from hermes_cli.web_server import _is_accepted_host
|
||||
|
||||
assert _is_accepted_host("LOCALHOST", "127.0.0.1")
|
||||
assert _is_accepted_host("LocalHost:9119", "127.0.0.1")
|
||||
|
||||
|
||||
class TestHostHeaderMiddleware:
|
||||
"""End-to-end test via the FastAPI app — verify the middleware
|
||||
rejects bad Host headers with 400."""
|
||||
|
||||
def test_rebinding_request_rejected(self):
|
||||
from fastapi.testclient import TestClient
|
||||
from hermes_cli.web_server import app
|
||||
|
||||
# Simulate start_server having set the bound_host
|
||||
app.state.bound_host = "127.0.0.1"
|
||||
try:
|
||||
client = TestClient(app)
|
||||
# The TestClient sends Host: testserver by default — which is
|
||||
# NOT a loopback alias, so the middleware must reject it.
|
||||
resp = client.get(
|
||||
"/api/status",
|
||||
headers={"Host": "evil.example"},
|
||||
)
|
||||
assert resp.status_code == 400
|
||||
assert "Invalid Host header" in resp.json()["detail"]
|
||||
finally:
|
||||
# Clean up so other tests don't inherit the bound_host
|
||||
if hasattr(app.state, "bound_host"):
|
||||
del app.state.bound_host
|
||||
|
||||
def test_legit_loopback_request_accepted(self):
|
||||
from fastapi.testclient import TestClient
|
||||
from hermes_cli.web_server import app
|
||||
|
||||
app.state.bound_host = "127.0.0.1"
|
||||
try:
|
||||
client = TestClient(app)
|
||||
# /api/status is in _PUBLIC_API_PATHS — passes auth — so the
|
||||
# only thing that can reject is the host header middleware
|
||||
resp = client.get(
|
||||
"/api/status",
|
||||
headers={"Host": "localhost:9119"},
|
||||
)
|
||||
# Either 200 (endpoint served) or some other non-400 —
|
||||
# just not the host-rejection 400
|
||||
assert resp.status_code != 400 or (
|
||||
"Invalid Host header" not in resp.json().get("detail", "")
|
||||
)
|
||||
finally:
|
||||
if hasattr(app.state, "bound_host"):
|
||||
del app.state.bound_host
|
||||
|
||||
def test_no_bound_host_skips_validation(self):
|
||||
"""If app.state.bound_host isn't set (e.g. running under test
|
||||
infra without calling start_server), middleware must pass through
|
||||
rather than crash."""
|
||||
from fastapi.testclient import TestClient
|
||||
from hermes_cli.web_server import app
|
||||
|
||||
# Make sure bound_host isn't set
|
||||
if hasattr(app.state, "bound_host"):
|
||||
del app.state.bound_host
|
||||
|
||||
client = TestClient(app)
|
||||
resp = client.get("/api/status")
|
||||
# Should get through to the status endpoint, not a 400
|
||||
assert resp.status_code != 400
|
||||
|
|
@ -136,13 +136,15 @@ class TestXiaomiModelCatalog:
|
|||
assert PROVIDER_TO_MODELS_DEV["xiaomi"] == "xiaomi"
|
||||
|
||||
def test_static_model_list_fallback(self):
|
||||
"""Static _PROVIDER_MODELS fallback must exist for model picker."""
|
||||
"""Static _PROVIDER_MODELS fallback must exist for model picker.
|
||||
|
||||
We only assert the provider key is present — the specific model
|
||||
names are data that changes with upstream releases and doesn't
|
||||
belong in tests.
|
||||
"""
|
||||
from hermes_cli.models import _PROVIDER_MODELS
|
||||
assert "xiaomi" in _PROVIDER_MODELS
|
||||
models = _PROVIDER_MODELS["xiaomi"]
|
||||
assert "mimo-v2-pro" in models
|
||||
assert "mimo-v2-omni" in models
|
||||
assert "mimo-v2-flash" in models
|
||||
assert len(_PROVIDER_MODELS["xiaomi"]) >= 1
|
||||
|
||||
def test_list_agentic_models_mock(self, monkeypatch):
|
||||
"""When models.dev returns Xiaomi data, list_agentic_models should return models."""
|
||||
|
|
|
|||
|
|
@ -118,6 +118,86 @@ class TestOpenAIWireFormatOnCustomProvider:
|
|||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
|
||||
class TestQwenAlibabaFamily:
|
||||
"""Qwen on OpenCode/OpenCode-Go/Alibaba — needs cache_control even on OpenAI-wire.
|
||||
|
||||
Upstream pi-mono #3392 / #3393 documented that these providers serve
|
||||
zero cache hits without Anthropic-style markers. Regression reported
|
||||
by community user (Qwen3.6 on opencode-go burning through
|
||||
subscription with no cache). Envelope layout, not native, because the
|
||||
wire format is OpenAI chat.completions.
|
||||
"""
|
||||
|
||||
def test_qwen_on_opencode_go_caches_with_envelope_layout(self):
|
||||
agent = _make_agent(
|
||||
provider="opencode-go",
|
||||
base_url="https://opencode.ai/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3.6-plus",
|
||||
)
|
||||
should, native = agent._anthropic_prompt_cache_policy()
|
||||
assert should is True, "Qwen on opencode-go must cache"
|
||||
assert native is False, "opencode-go is OpenAI-wire; envelope layout"
|
||||
|
||||
def test_qwen35_plus_on_opencode_go(self):
|
||||
agent = _make_agent(
|
||||
provider="opencode-go",
|
||||
base_url="https://opencode.ai/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3.5-plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_qwen_on_opencode_zen_caches(self):
|
||||
agent = _make_agent(
|
||||
provider="opencode",
|
||||
base_url="https://opencode.ai/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3-coder-plus",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_qwen_on_direct_alibaba_caches(self):
|
||||
agent = _make_agent(
|
||||
provider="alibaba",
|
||||
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen3-coder",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (True, False)
|
||||
|
||||
def test_non_qwen_on_opencode_go_does_not_cache(self):
|
||||
# GLM / Kimi on opencode-go don't need markers (they have automatic
|
||||
# server-side caching or none at all).
|
||||
agent = _make_agent(
|
||||
provider="opencode-go",
|
||||
base_url="https://opencode.ai/v1",
|
||||
api_mode="chat_completions",
|
||||
model="glm-5",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
def test_kimi_on_opencode_go_does_not_cache(self):
|
||||
agent = _make_agent(
|
||||
provider="opencode-go",
|
||||
base_url="https://opencode.ai/v1",
|
||||
api_mode="chat_completions",
|
||||
model="kimi-k2.5",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
def test_qwen_on_openrouter_not_affected(self):
|
||||
# Qwen via OpenRouter falls through — OpenRouter has its own
|
||||
# upstream caching arrangement for Qwen (provider-dependent).
|
||||
agent = _make_agent(
|
||||
provider="openrouter",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
api_mode="chat_completions",
|
||||
model="qwen/qwen3-coder",
|
||||
)
|
||||
assert agent._anthropic_prompt_cache_policy() == (False, False)
|
||||
|
||||
|
||||
class TestExplicitOverrides:
|
||||
"""Policy accepts keyword overrides for switch_model / fallback activation."""
|
||||
|
||||
|
|
|
|||
|
|
@ -67,6 +67,14 @@ def test_get_proxy_from_env_ignores_blank_values(monkeypatch):
|
|||
assert _get_proxy_from_env() == "http://real-proxy:8080"
|
||||
|
||||
|
||||
def test_get_proxy_from_env_normalizes_socks_alias(monkeypatch):
|
||||
for key in ("HTTPS_PROXY", "HTTP_PROXY", "ALL_PROXY",
|
||||
"https_proxy", "http_proxy", "all_proxy"):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
monkeypatch.setenv("ALL_PROXY", "socks://127.0.0.1:1080/")
|
||||
assert _get_proxy_from_env() == "socks5://127.0.0.1:1080/"
|
||||
|
||||
|
||||
@patch("run_agent.OpenAI")
|
||||
def test_create_openai_client_routes_via_proxy_when_env_set(mock_openai, monkeypatch):
|
||||
"""With HTTPS_PROXY set, the custom httpx.Client must mount an HTTPProxy pool.
|
||||
|
|
|
|||
|
|
@ -33,6 +33,11 @@ class TestInterruptPropagationToChild(unittest.TestCase):
|
|||
agent._active_children = []
|
||||
agent._active_children_lock = threading.Lock()
|
||||
agent.quiet_mode = True
|
||||
# Provider/model/base_url are read by stale-timeout resolution paths;
|
||||
# the specific values don't matter for interrupt tests.
|
||||
agent.provider = "openrouter"
|
||||
agent.model = "test/model"
|
||||
agent._base_url = "http://localhost:1234"
|
||||
return agent
|
||||
|
||||
def test_parent_interrupt_sets_child_flag(self):
|
||||
|
|
|
|||
|
|
@ -952,6 +952,84 @@ class TestBuildApiKwargs:
|
|||
|
||||
assert "temperature" not in kwargs
|
||||
|
||||
def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""Kimi endpoint should send max_tokens=32000 and reasoning_effort as
|
||||
top-level params, matching Kimi CLI's default behavior."""
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["max_tokens"] == 32000
|
||||
assert kwargs["reasoning_effort"] == "medium"
|
||||
|
||||
def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
|
||||
"""reasoning_effort should reflect reasoning_config.effort when set."""
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
agent.reasoning_config = {"enabled": True, "effort": "high"}
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["reasoning_effort"] == "high"
|
||||
|
||||
def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
|
||||
"""Kimi endpoint should send extra_body.thinking={"type":"enabled"}
|
||||
to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
|
||||
def test_kimi_coding_endpoint_disables_thinking(self, agent):
|
||||
"""When reasoning_config.enabled=False, thinking should be disabled
|
||||
and reasoning_effort should be omitted entirely — mirroring Kimi
|
||||
CLI's with_thinking("off") which maps to reasoning_effort=None."""
|
||||
agent.base_url = "https://api.kimi.com/coding/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-for-coding"
|
||||
agent.reasoning_config = {"enabled": False}
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "disabled"}
|
||||
assert "reasoning_effort" not in kwargs
|
||||
|
||||
def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""api.moonshot.ai should get the same Kimi-compatible params."""
|
||||
agent.base_url = "https://api.moonshot.ai/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-k2.5"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["max_tokens"] == 32000
|
||||
assert kwargs["reasoning_effort"] == "medium"
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
|
||||
def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
|
||||
"""api.moonshot.cn (China endpoint) should get the same params."""
|
||||
agent.base_url = "https://api.moonshot.cn/v1"
|
||||
agent._base_url_lower = agent.base_url.lower()
|
||||
agent.model = "kimi-k2.5"
|
||||
messages = [{"role": "user", "content": "hi"}]
|
||||
|
||||
kwargs = agent._build_api_kwargs(messages)
|
||||
|
||||
assert kwargs["max_tokens"] == 32000
|
||||
assert kwargs["reasoning_effort"] == "medium"
|
||||
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
|
||||
|
||||
def test_provider_preferences_injected(self, agent):
|
||||
agent.base_url = "https://openrouter.ai/api/v1"
|
||||
agent.providers_allowed = ["Anthropic"]
|
||||
|
|
|
|||
203
tests/test_account_usage.py
Normal file
203
tests/test_account_usage.py
Normal file
|
|
@ -0,0 +1,203 @@
|
|||
from datetime import datetime, timezone
|
||||
|
||||
from agent.account_usage import (
|
||||
AccountUsageSnapshot,
|
||||
AccountUsageWindow,
|
||||
fetch_account_usage,
|
||||
render_account_usage_lines,
|
||||
)
|
||||
|
||||
|
||||
class _Response:
|
||||
def __init__(self, payload, status_code=200):
|
||||
self._payload = payload
|
||||
self.status_code = status_code
|
||||
|
||||
def raise_for_status(self):
|
||||
if self.status_code >= 400:
|
||||
raise RuntimeError(f"HTTP {self.status_code}")
|
||||
|
||||
def json(self):
|
||||
return self._payload
|
||||
|
||||
|
||||
class _Client:
|
||||
def __init__(self, payload):
|
||||
self._payload = payload
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def get(self, url, headers=None):
|
||||
return _Response(self._payload)
|
||||
|
||||
|
||||
class _RoutingClient:
|
||||
def __init__(self, payloads):
|
||||
self._payloads = payloads
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, exc_type, exc, tb):
|
||||
return False
|
||||
|
||||
def get(self, url, headers=None):
|
||||
return _Response(self._payloads[url])
|
||||
|
||||
|
||||
def test_fetch_account_usage_codex(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.resolve_codex_runtime_credentials",
|
||||
lambda refresh_if_expiring=True: {
|
||||
"provider": "openai-codex",
|
||||
"base_url": "https://chatgpt.com/backend-api/codex",
|
||||
"api_key": "access-token",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage._read_codex_tokens",
|
||||
lambda: {"tokens": {"account_id": "acct_123"}},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.httpx.Client",
|
||||
lambda timeout=15.0: _Client(
|
||||
{
|
||||
"plan_type": "pro",
|
||||
"rate_limit": {
|
||||
"primary_window": {
|
||||
"used_percent": 15,
|
||||
"reset_at": 1_900_000_000,
|
||||
"limit_window_seconds": 18000,
|
||||
},
|
||||
"secondary_window": {
|
||||
"used_percent": 40,
|
||||
"reset_at": 1_900_500_000,
|
||||
"limit_window_seconds": 604800,
|
||||
},
|
||||
},
|
||||
"credits": {"has_credits": True, "balance": 12.5},
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
snapshot = fetch_account_usage("openai-codex")
|
||||
|
||||
assert snapshot is not None
|
||||
assert snapshot.plan == "Pro"
|
||||
assert len(snapshot.windows) == 2
|
||||
assert snapshot.windows[0].label == "Session"
|
||||
assert snapshot.windows[0].used_percent == 15.0
|
||||
assert snapshot.windows[0].reset_at == datetime.fromtimestamp(1_900_000_000, tz=timezone.utc)
|
||||
assert "Credits balance: $12.50" in snapshot.details
|
||||
|
||||
|
||||
def test_render_account_usage_lines_includes_reset_and_provider():
|
||||
snapshot = AccountUsageSnapshot(
|
||||
provider="openai-codex",
|
||||
source="usage_api",
|
||||
fetched_at=datetime.now(timezone.utc),
|
||||
plan="Pro",
|
||||
windows=(
|
||||
AccountUsageWindow(
|
||||
label="Session",
|
||||
used_percent=25,
|
||||
reset_at=datetime.now(timezone.utc),
|
||||
),
|
||||
),
|
||||
details=("Credits balance: $9.99",),
|
||||
)
|
||||
lines = render_account_usage_lines(snapshot)
|
||||
|
||||
assert lines[0] == "📈 Account limits"
|
||||
assert "openai-codex (Pro)" in lines[1]
|
||||
assert "Session: 75% remaining (25% used)" in lines[2]
|
||||
assert "Credits balance: $9.99" in lines[3]
|
||||
|
||||
|
||||
def test_fetch_account_usage_openrouter_uses_limit_remaining_and_ignores_deprecated_rate_limit(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.resolve_runtime_provider",
|
||||
lambda requested, explicit_base_url=None, explicit_api_key=None: {
|
||||
"provider": "openrouter",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_key": "sk-test",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.httpx.Client",
|
||||
lambda timeout=10.0: _RoutingClient(
|
||||
{
|
||||
"https://openrouter.ai/api/v1/credits": {
|
||||
"data": {"total_credits": 300.0, "total_usage": 10.92}
|
||||
},
|
||||
"https://openrouter.ai/api/v1/key": {
|
||||
"data": {
|
||||
"limit": 100.0,
|
||||
"limit_remaining": 70.0,
|
||||
"limit_reset": "monthly",
|
||||
"usage": 12.5,
|
||||
"usage_daily": 0.5,
|
||||
"usage_weekly": 2.0,
|
||||
"usage_monthly": 8.0,
|
||||
"rate_limit": {"requests": -1, "interval": "10s"},
|
||||
}
|
||||
},
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
snapshot = fetch_account_usage("openrouter")
|
||||
|
||||
assert snapshot is not None
|
||||
assert snapshot.windows == (
|
||||
AccountUsageWindow(
|
||||
label="API key quota",
|
||||
used_percent=30.0,
|
||||
detail="$70.00 of $100.00 remaining • resets monthly",
|
||||
),
|
||||
)
|
||||
assert "Credits balance: $289.08" in snapshot.details
|
||||
assert "API key usage: $12.50 total • $0.50 today • $2.00 this week • $8.00 this month" in snapshot.details
|
||||
assert all("-1 requests / 10s" not in line for line in render_account_usage_lines(snapshot))
|
||||
|
||||
|
||||
def test_fetch_account_usage_openrouter_omits_quota_window_when_key_has_no_limit(monkeypatch):
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.resolve_runtime_provider",
|
||||
lambda requested, explicit_base_url=None, explicit_api_key=None: {
|
||||
"provider": "openrouter",
|
||||
"base_url": "https://openrouter.ai/api/v1",
|
||||
"api_key": "sk-test",
|
||||
},
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"agent.account_usage.httpx.Client",
|
||||
lambda timeout=10.0: _RoutingClient(
|
||||
{
|
||||
"https://openrouter.ai/api/v1/credits": {
|
||||
"data": {"total_credits": 100.0, "total_usage": 25.5}
|
||||
},
|
||||
"https://openrouter.ai/api/v1/key": {
|
||||
"data": {
|
||||
"limit": None,
|
||||
"limit_remaining": None,
|
||||
"usage": 25.5,
|
||||
"usage_daily": 1.25,
|
||||
"usage_weekly": 4.5,
|
||||
"usage_monthly": 18.0,
|
||||
}
|
||||
},
|
||||
}
|
||||
),
|
||||
)
|
||||
|
||||
snapshot = fetch_account_usage("openrouter")
|
||||
|
||||
assert snapshot is not None
|
||||
assert snapshot.windows == ()
|
||||
assert "Credits balance: $74.50" in snapshot.details
|
||||
assert "API key usage: $25.50 total • $1.25 today • $4.50 this week • $18.00 this month" in snapshot.details
|
||||
|
|
@ -106,3 +106,55 @@ class TestBaseUrlHostMatchesEdgeCases:
|
|||
|
||||
def test_trailing_dot_on_domain_stripped(self):
|
||||
assert base_url_host_matches("https://openrouter.ai/v1", "openrouter.ai.") is True
|
||||
|
||||
|
||||
class TestOllamaUrlHostCheck:
|
||||
"""GHSA-76xc-57q6-vm5m — ollama.com was using a raw substring match for
|
||||
credential selection (same bug class as GHSA-xf8p-v2cg-h7h5 for OpenRouter).
|
||||
These tests lock in that the base_url_host_matches fix correctly rejects
|
||||
the same attack vectors for Ollama.
|
||||
"""
|
||||
|
||||
def test_ollama_com_path_injection_rejected(self):
|
||||
"""http://evil.test/ollama.com/v1 — ollama.com appears in the path,
|
||||
not the host. Must not be treated as Ollama Cloud."""
|
||||
assert base_url_host_matches(
|
||||
"http://127.0.0.1:9000/ollama.com/v1", "ollama.com"
|
||||
) is False
|
||||
|
||||
def test_ollama_com_subdomain_lookalike_rejected(self):
|
||||
"""ollama.com.attacker.test is a separate host, not ollama.com."""
|
||||
assert base_url_host_matches(
|
||||
"http://ollama.com.attacker.test:9000/v1", "ollama.com"
|
||||
) is False
|
||||
|
||||
def test_ollama_com_localtest_me_rejected(self):
|
||||
"""ollama.com.localtest.me resolves to 127.0.0.1 via localtest.me
|
||||
but its true hostname is localtest.me, not ollama.com."""
|
||||
assert base_url_host_matches(
|
||||
"http://ollama.com.localtest.me:9000/v1", "ollama.com"
|
||||
) is False
|
||||
|
||||
def test_ollama_ai_is_not_ollama_com(self):
|
||||
"""Different TLD. ollama.ai is not ollama.com."""
|
||||
assert base_url_host_matches(
|
||||
"https://ollama.ai/v1", "ollama.com"
|
||||
) is False
|
||||
|
||||
def test_localhost_ollama_port_is_not_ollama_com(self):
|
||||
"""http://localhost:11434/v1 is a local Ollama install, but its
|
||||
hostname is localhost, so OLLAMA_API_KEY (an ollama.com-only secret)
|
||||
must not be sent."""
|
||||
assert base_url_host_matches(
|
||||
"http://localhost:11434/v1", "ollama.com"
|
||||
) is False
|
||||
|
||||
def test_genuine_ollama_com_matches(self):
|
||||
assert base_url_host_matches(
|
||||
"https://ollama.com/api/generate", "ollama.com"
|
||||
) is True
|
||||
|
||||
def test_ollama_com_subdomain_matches(self):
|
||||
assert base_url_host_matches(
|
||||
"https://api.ollama.com/v1", "ollama.com"
|
||||
) is True
|
||||
|
|
|
|||
|
|
@ -161,6 +161,8 @@ def test_transform_tool_result_runs_after_post_tool_call(monkeypatch):
|
|||
|
||||
def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_path):
|
||||
"""End-to-end: load a real plugin from HERMES_HOME and verify it rewrites results."""
|
||||
import yaml
|
||||
|
||||
hermes_home = Path(os.environ["HERMES_HOME"])
|
||||
plugins_dir = hermes_home / "plugins"
|
||||
plugin_dir = plugins_dir / "transform_result_canon"
|
||||
|
|
@ -172,7 +174,15 @@ def test_transform_tool_result_integration_with_real_plugin(monkeypatch, tmp_pat
|
|||
'lambda **kw: f\'CANON[{kw["tool_name"]}]\' + kw["result"])\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
# Plugins are opt-in — must be listed in plugins.enabled to load.
|
||||
cfg_path = hermes_home / "config.yaml"
|
||||
cfg_path.write_text(
|
||||
yaml.safe_dump({"plugins": {"enabled": ["transform_result_canon"]}}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Force a fresh plugin manager so the new config is picked up.
|
||||
plugins_mod._plugin_manager = plugins_mod.PluginManager()
|
||||
plugins_mod.discover_plugins()
|
||||
|
||||
out = _run_handle_function_call(
|
||||
|
|
|
|||
|
|
@ -58,10 +58,3 @@ class TestCamofoxConfigDefaults:
|
|||
|
||||
browser_cfg = DEFAULT_CONFIG["browser"]
|
||||
assert browser_cfg["camofox"]["managed_persistence"] is False
|
||||
|
||||
def test_config_version_matches_current_schema(self):
|
||||
from hermes_cli.config import DEFAULT_CONFIG
|
||||
|
||||
# The current schema version is tracked globally; unrelated default
|
||||
# options may bump it after browser defaults are added.
|
||||
assert DEFAULT_CONFIG["_config_version"] == 20
|
||||
|
|
|
|||
|
|
@ -172,28 +172,60 @@ class TestTerminalIntegration:
|
|||
assert blocked_var not in result
|
||||
assert "PATH" in result
|
||||
|
||||
def test_passthrough_allows_blocklisted_var(self):
|
||||
from tools.environments.local import _sanitize_subprocess_env, _HERMES_PROVIDER_ENV_BLOCKLIST
|
||||
def test_passthrough_cannot_override_provider_blocklist(self):
|
||||
"""GHSA-rhgp-j443-p4rf: register_env_passthrough must NOT accept
|
||||
Hermes provider credentials — that was the bypass where a skill
|
||||
could declare ANTHROPIC_TOKEN / OPENAI_API_KEY as passthrough and
|
||||
defeat the execute_code sandbox scrubbing."""
|
||||
from tools.environments.local import (
|
||||
_sanitize_subprocess_env,
|
||||
_HERMES_PROVIDER_ENV_BLOCKLIST,
|
||||
)
|
||||
|
||||
blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
|
||||
# Attempt to register — must be silently refused (logged warning).
|
||||
register_env_passthrough([blocked_var])
|
||||
|
||||
# is_env_passthrough must NOT report it as allowed
|
||||
assert not is_env_passthrough(blocked_var)
|
||||
|
||||
# Sanitizer still strips the var from subprocess env
|
||||
env = {blocked_var: "secret_value", "PATH": "/usr/bin"}
|
||||
result = _sanitize_subprocess_env(env)
|
||||
assert blocked_var in result
|
||||
assert result[blocked_var] == "secret_value"
|
||||
assert blocked_var not in result
|
||||
assert "PATH" in result
|
||||
|
||||
def test_make_run_env_passthrough(self, monkeypatch):
|
||||
from tools.environments.local import _make_run_env, _HERMES_PROVIDER_ENV_BLOCKLIST
|
||||
def test_make_run_env_blocklist_override_rejected(self):
|
||||
"""_make_run_env must NOT expose a blocklisted var to subprocess env
|
||||
even after a skill attempts to register it via passthrough."""
|
||||
import os
|
||||
from tools.environments.local import (
|
||||
_make_run_env,
|
||||
_HERMES_PROVIDER_ENV_BLOCKLIST,
|
||||
)
|
||||
|
||||
blocked_var = next(iter(_HERMES_PROVIDER_ENV_BLOCKLIST))
|
||||
monkeypatch.setenv(blocked_var, "secret_value")
|
||||
os.environ[blocked_var] = "secret_value"
|
||||
try:
|
||||
# Without passthrough — blocked
|
||||
result_before = _make_run_env({})
|
||||
assert blocked_var not in result_before
|
||||
|
||||
# Without passthrough — blocked
|
||||
result_before = _make_run_env({})
|
||||
assert blocked_var not in result_before
|
||||
# Skill tries to register it — must be refused, so still blocked
|
||||
register_env_passthrough([blocked_var])
|
||||
result_after = _make_run_env({})
|
||||
assert blocked_var not in result_after
|
||||
finally:
|
||||
os.environ.pop(blocked_var, None)
|
||||
|
||||
# With passthrough — allowed
|
||||
register_env_passthrough([blocked_var])
|
||||
result_after = _make_run_env({})
|
||||
assert blocked_var in result_after
|
||||
def test_non_hermes_api_key_still_registerable(self):
|
||||
"""Third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
|
||||
Hermes provider credentials and must still pass through — skills
|
||||
that legitimately wrap third-party APIs must keep working."""
|
||||
# TENOR_API_KEY is a real example — used by the gif-search skill
|
||||
register_env_passthrough(["TENOR_API_KEY"])
|
||||
assert is_env_passthrough("TENOR_API_KEY")
|
||||
|
||||
# Arbitrary skill-specific var
|
||||
register_env_passthrough(["MY_SKILL_CUSTOM_CONFIG"])
|
||||
assert is_env_passthrough("MY_SKILL_CUSTOM_CONFIG")
|
||||
|
|
|
|||
|
|
@ -230,3 +230,102 @@ class TestEscapeDriftGuard:
|
|||
new, count, strategy, err = fuzzy_find_and_replace(content, old_string, new_string)
|
||||
assert err is None
|
||||
assert count == 1
|
||||
|
||||
|
||||
class TestFindClosestLines:
|
||||
def setup_method(self):
|
||||
from tools.fuzzy_match import find_closest_lines
|
||||
self.find_closest_lines = find_closest_lines
|
||||
|
||||
def test_finds_similar_line(self):
|
||||
content = "def foo():\n pass\ndef bar():\n return 1\n"
|
||||
result = self.find_closest_lines("def baz():", content)
|
||||
assert "def foo" in result or "def bar" in result
|
||||
|
||||
def test_returns_empty_for_no_match(self):
|
||||
content = "completely different content here"
|
||||
result = self.find_closest_lines("xyzzy_no_match_possible_!!!", content)
|
||||
assert result == ""
|
||||
|
||||
def test_returns_empty_for_empty_inputs(self):
|
||||
assert self.find_closest_lines("", "some content") == ""
|
||||
assert self.find_closest_lines("old string", "") == ""
|
||||
|
||||
def test_includes_context_lines(self):
|
||||
content = "line1\nline2\ndef target():\n pass\nline5\n"
|
||||
result = self.find_closest_lines("def target():", content)
|
||||
assert "target" in result
|
||||
|
||||
def test_includes_line_numbers(self):
|
||||
content = "line1\nline2\ndef foo():\n pass\n"
|
||||
result = self.find_closest_lines("def foo():", content)
|
||||
# Should include line numbers in format "N| content"
|
||||
assert "|" in result
|
||||
|
||||
|
||||
class TestFormatNoMatchHint:
|
||||
"""Gating tests for format_no_match_hint — the shared helper that decides
|
||||
whether a 'Did you mean?' snippet should be appended to an error.
|
||||
"""
|
||||
|
||||
def setup_method(self):
|
||||
from tools.fuzzy_match import format_no_match_hint
|
||||
self.fmt = format_no_match_hint
|
||||
|
||||
def test_fires_on_could_not_find_with_match(self):
|
||||
"""Classic no-match: similar content exists → hint fires."""
|
||||
content = "def foo():\n pass\ndef bar():\n pass\n"
|
||||
result = self.fmt(
|
||||
"Could not find a match for old_string in the file",
|
||||
0, "def baz():", content,
|
||||
)
|
||||
assert "Did you mean" in result
|
||||
assert "foo" in result or "bar" in result
|
||||
|
||||
def test_silent_on_ambiguous_match_error(self):
|
||||
"""'Found N matches' is not a missing-match failure — no hint."""
|
||||
content = "aaa bbb aaa\n"
|
||||
result = self.fmt(
|
||||
"Found 2 matches for old_string. Provide more context to make it unique, or use replace_all=True.",
|
||||
0, "aaa", content,
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
def test_silent_on_escape_drift_error(self):
|
||||
"""Escape-drift errors are intentional blocks — hint would mislead."""
|
||||
content = "x = 1\n"
|
||||
result = self.fmt(
|
||||
"Escape-drift detected: old_string and new_string contain the literal sequence '\\\\''...",
|
||||
0, "x = \\'1\\'", content,
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
def test_silent_on_identical_strings(self):
|
||||
"""old_string == new_string — hint irrelevant."""
|
||||
result = self.fmt(
|
||||
"old_string and new_string are identical",
|
||||
0, "foo", "foo bar\n",
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
def test_silent_when_match_count_nonzero(self):
|
||||
"""If match succeeded, we shouldn't be in the error path — defense in depth."""
|
||||
result = self.fmt(
|
||||
"Could not find a match for old_string in the file",
|
||||
1, "foo", "foo bar\n",
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
def test_silent_on_none_error(self):
|
||||
"""No error at all — no hint."""
|
||||
result = self.fmt(None, 0, "foo", "bar\n")
|
||||
assert result == ""
|
||||
|
||||
def test_silent_when_no_similar_content(self):
|
||||
"""Even for a valid no-match error, skip hint when nothing similar exists."""
|
||||
result = self.fmt(
|
||||
"Could not find a match for old_string in the file",
|
||||
0, "totally_unique_xyzzy_qux", "abc\nxyz\n",
|
||||
)
|
||||
assert result == ""
|
||||
|
||||
|
|
|
|||
39
tests/tools/test_image_generation_env.py
Normal file
39
tests/tools/test_image_generation_env.py
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
"""FAL_KEY env var normalization (whitespace-only treated as unset)."""
|
||||
|
||||
|
||||
def test_fal_key_whitespace_is_unset(monkeypatch):
|
||||
# Whitespace-only FAL_KEY must NOT register as configured, and the managed
|
||||
# gateway fallback must be disabled for this assertion to be meaningful.
|
||||
monkeypatch.setenv("FAL_KEY", " ")
|
||||
|
||||
from tools import image_generation_tool
|
||||
|
||||
monkeypatch.setattr(
|
||||
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
|
||||
)
|
||||
|
||||
assert image_generation_tool.check_fal_api_key() is False
|
||||
|
||||
|
||||
def test_fal_key_valid(monkeypatch):
|
||||
monkeypatch.setenv("FAL_KEY", "sk-test")
|
||||
|
||||
from tools import image_generation_tool
|
||||
|
||||
monkeypatch.setattr(
|
||||
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
|
||||
)
|
||||
|
||||
assert image_generation_tool.check_fal_api_key() is True
|
||||
|
||||
|
||||
def test_fal_key_empty_is_unset(monkeypatch):
|
||||
monkeypatch.setenv("FAL_KEY", "")
|
||||
|
||||
from tools import image_generation_tool
|
||||
|
||||
monkeypatch.setattr(
|
||||
image_generation_tool, "_resolve_managed_fal_gateway", lambda: None
|
||||
)
|
||||
|
||||
assert image_generation_tool.check_fal_api_key() is False
|
||||
162
tests/tools/test_local_shell_init.py
Normal file
162
tests/tools/test_local_shell_init.py
Normal file
|
|
@ -0,0 +1,162 @@
|
|||
"""Tests for terminal.shell_init_files / terminal.auto_source_bashrc.
|
||||
|
||||
A bash ``-l -c`` invocation does NOT source ``~/.bashrc``, so tools that
|
||||
register themselves there (nvm, asdf, pyenv) stay invisible to the
|
||||
environment snapshot built by ``LocalEnvironment.init_session``. These
|
||||
tests verify the config-driven prelude that fixes that.
|
||||
"""
|
||||
|
||||
import os
|
||||
from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
from tools.environments.local import (
|
||||
LocalEnvironment,
|
||||
_prepend_shell_init,
|
||||
_read_terminal_shell_init_config,
|
||||
_resolve_shell_init_files,
|
||||
)
|
||||
|
||||
|
||||
class TestResolveShellInitFiles:
|
||||
def test_auto_sources_bashrc_when_present(self, tmp_path, monkeypatch):
|
||||
bashrc = tmp_path / ".bashrc"
|
||||
bashrc.write_text('export MARKER=seen\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
# Default config: auto_source_bashrc on, no explicit list.
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([], True),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == [str(bashrc)]
|
||||
|
||||
def test_skips_bashrc_when_missing(self, tmp_path, monkeypatch):
|
||||
# No bashrc written.
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([], True),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == []
|
||||
|
||||
def test_auto_source_bashrc_off_suppresses_default(self, tmp_path, monkeypatch):
|
||||
bashrc = tmp_path / ".bashrc"
|
||||
bashrc.write_text('export MARKER=seen\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([], False),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == []
|
||||
|
||||
def test_explicit_list_wins_over_auto(self, tmp_path, monkeypatch):
|
||||
bashrc = tmp_path / ".bashrc"
|
||||
bashrc.write_text('export FROM_BASHRC=1\n')
|
||||
custom = tmp_path / "custom.sh"
|
||||
custom.write_text('export FROM_CUSTOM=1\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
|
||||
# auto_source_bashrc stays True but the explicit list takes precedence.
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([str(custom)], True),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == [str(custom)]
|
||||
assert str(bashrc) not in resolved
|
||||
|
||||
def test_expands_home_and_env_vars(self, tmp_path, monkeypatch):
|
||||
target = tmp_path / "rc" / "custom.sh"
|
||||
target.parent.mkdir()
|
||||
target.write_text('export A=1\n')
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
monkeypatch.setenv("CUSTOM_RC_DIR", str(tmp_path / "rc"))
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=(["~/rc/custom.sh"], False),
|
||||
):
|
||||
resolved_home = _resolve_shell_init_files()
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=(["${CUSTOM_RC_DIR}/custom.sh"], False),
|
||||
):
|
||||
resolved_var = _resolve_shell_init_files()
|
||||
|
||||
assert resolved_home == [str(target)]
|
||||
assert resolved_var == [str(target)]
|
||||
|
||||
def test_missing_explicit_files_are_skipped_silently(self, tmp_path, monkeypatch):
|
||||
monkeypatch.setenv("HOME", str(tmp_path))
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([str(tmp_path / "does-not-exist.sh")], False),
|
||||
):
|
||||
resolved = _resolve_shell_init_files()
|
||||
|
||||
assert resolved == []
|
||||
|
||||
|
||||
class TestPrependShellInit:
|
||||
def test_empty_list_returns_command_unchanged(self):
|
||||
assert _prepend_shell_init("echo hi", []) == "echo hi"
|
||||
|
||||
def test_prepends_guarded_source_lines(self):
|
||||
wrapped = _prepend_shell_init("echo hi", ["/tmp/a.sh", "/tmp/b.sh"])
|
||||
assert "echo hi" in wrapped
|
||||
# Each file is sourced through a guarded [ -r … ] && . '…' || true
|
||||
# pattern so a missing/broken rc can't abort the bootstrap.
|
||||
assert "/tmp/a.sh" in wrapped
|
||||
assert "/tmp/b.sh" in wrapped
|
||||
assert "|| true" in wrapped
|
||||
assert "set +e" in wrapped
|
||||
|
||||
def test_escapes_single_quotes(self):
|
||||
wrapped = _prepend_shell_init("echo hi", ["/tmp/o'malley.sh"])
|
||||
# The path must survive as the shell receives it; embedded single
|
||||
# quote is escaped as '\'' rather than breaking the outer quoting.
|
||||
assert "o'\\''malley" in wrapped
|
||||
|
||||
|
||||
@pytest.mark.skipif(
|
||||
os.environ.get("CI") == "true" and not os.path.isfile("/bin/bash"),
|
||||
reason="Requires bash; CI sandbox may strip it.",
|
||||
)
|
||||
class TestSnapshotEndToEnd:
|
||||
"""Spin up a real LocalEnvironment and confirm the snapshot sources
|
||||
extra init files."""
|
||||
|
||||
def test_snapshot_picks_up_init_file_exports(self, tmp_path, monkeypatch):
|
||||
init_file = tmp_path / "custom-init.sh"
|
||||
init_file.write_text(
|
||||
'export HERMES_SHELL_INIT_PROBE="probe-ok"\n'
|
||||
'export PATH="/opt/shell-init-probe/bin:$PATH"\n'
|
||||
)
|
||||
|
||||
with patch(
|
||||
"tools.environments.local._read_terminal_shell_init_config",
|
||||
return_value=([str(init_file)], False),
|
||||
):
|
||||
env = LocalEnvironment(cwd=str(tmp_path), timeout=15)
|
||||
try:
|
||||
result = env.execute(
|
||||
'echo "PROBE=$HERMES_SHELL_INIT_PROBE"; echo "PATH=$PATH"'
|
||||
)
|
||||
finally:
|
||||
env.cleanup()
|
||||
|
||||
output = result.get("output", "")
|
||||
assert "PROBE=probe-ok" in output
|
||||
assert "/opt/shell-init-probe/bin" in output
|
||||
252
tests/tools/test_mcp_circuit_breaker.py
Normal file
252
tests/tools/test_mcp_circuit_breaker.py
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
"""Tests for MCP tool-handler circuit-breaker recovery.
|
||||
|
||||
The circuit breaker in ``tools/mcp_tool.py`` is intended to short-circuit
|
||||
calls to an MCP server that has failed ``_CIRCUIT_BREAKER_THRESHOLD``
|
||||
consecutive times, then *transition back to a usable state* once the
|
||||
server has had time to recover (or an explicit reconnect succeeds).
|
||||
|
||||
The original implementation only had two states — closed and open — with
|
||||
no mechanism to transition back to closed, so a tripped breaker stayed
|
||||
tripped for the lifetime of the process. These tests lock in the
|
||||
half-open / cooldown / reconnect-resets-breaker behavior that fixes
|
||||
that.
|
||||
"""
|
||||
import json
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
|
||||
pytest.importorskip("mcp.client.auth.oauth2")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def _install_stub_server(mcp_tool_module, name: str, call_tool_impl):
|
||||
"""Install a fake MCP server in the module's registry.
|
||||
|
||||
``call_tool_impl`` is an async function stored at ``session.call_tool``
|
||||
(it's what the tool handler invokes).
|
||||
"""
|
||||
server = MagicMock()
|
||||
server.name = name
|
||||
session = MagicMock()
|
||||
session.call_tool = call_tool_impl
|
||||
server.session = session
|
||||
server._reconnect_event = MagicMock()
|
||||
server._ready = MagicMock()
|
||||
server._ready.is_set.return_value = True
|
||||
|
||||
mcp_tool_module._servers[name] = server
|
||||
mcp_tool_module._server_error_counts.pop(name, None)
|
||||
if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
|
||||
mcp_tool_module._server_breaker_opened_at.pop(name, None)
|
||||
return server
|
||||
|
||||
|
||||
def _cleanup(mcp_tool_module, name: str) -> None:
|
||||
mcp_tool_module._servers.pop(name, None)
|
||||
mcp_tool_module._server_error_counts.pop(name, None)
|
||||
if hasattr(mcp_tool_module, "_server_breaker_opened_at"):
|
||||
mcp_tool_module._server_breaker_opened_at.pop(name, None)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_circuit_breaker_half_opens_after_cooldown(monkeypatch, tmp_path):
|
||||
"""After a tripped breaker's cooldown elapses, the *next* call must
|
||||
actually execute against the session (half-open probe). When the
|
||||
probe succeeds, the breaker resets to fully closed.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
from tools import mcp_tool
|
||||
from tools.mcp_tool import _make_tool_handler
|
||||
|
||||
call_count = {"n": 0}
|
||||
|
||||
async def _call_tool_success(*a, **kw):
|
||||
call_count["n"] += 1
|
||||
result = MagicMock()
|
||||
result.isError = False
|
||||
block = MagicMock()
|
||||
block.text = "ok"
|
||||
result.content = [block]
|
||||
result.structuredContent = None
|
||||
return result
|
||||
|
||||
_install_stub_server(mcp_tool, "srv", _call_tool_success)
|
||||
mcp_tool._ensure_mcp_loop()
|
||||
|
||||
try:
|
||||
# Trip the breaker by setting the count at/above threshold and
|
||||
# stamping the open-time to "now".
|
||||
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
|
||||
fake_now = [1000.0]
|
||||
|
||||
def _fake_monotonic():
|
||||
return fake_now[0]
|
||||
|
||||
monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
|
||||
# The breaker-open timestamp dict is introduced by the fix; on
|
||||
# a pre-fix build it won't exist, which will cause the test to
|
||||
# fail at the .get() inside the gate (correct — the fix is
|
||||
# required for this state to be tracked at all).
|
||||
if hasattr(mcp_tool, "_server_breaker_opened_at"):
|
||||
mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
|
||||
cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
|
||||
|
||||
handler = _make_tool_handler("srv", "tool1", 10.0)
|
||||
|
||||
# Before cooldown: must short-circuit (no session call).
|
||||
result = handler({})
|
||||
parsed = json.loads(result)
|
||||
assert "error" in parsed, parsed
|
||||
assert "unreachable" in parsed["error"].lower()
|
||||
assert call_count["n"] == 0, (
|
||||
"breaker should short-circuit before cooldown elapses"
|
||||
)
|
||||
|
||||
# Advance past cooldown → next call is a half-open probe that
|
||||
# actually hits the session.
|
||||
fake_now[0] += cooldown + 1.0
|
||||
|
||||
result = handler({})
|
||||
parsed = json.loads(result)
|
||||
assert parsed.get("result") == "ok", parsed
|
||||
assert call_count["n"] == 1, "half-open probe should invoke session"
|
||||
|
||||
# On probe success the breaker must close (count reset to 0).
|
||||
assert mcp_tool._server_error_counts.get("srv", 0) == 0
|
||||
finally:
|
||||
_cleanup(mcp_tool, "srv")
|
||||
|
||||
|
||||
def test_circuit_breaker_reopens_on_probe_failure(monkeypatch, tmp_path):
|
||||
"""If the half-open probe fails, the breaker must re-arm the
|
||||
cooldown (not let every subsequent call through).
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
from tools import mcp_tool
|
||||
from tools.mcp_tool import _make_tool_handler
|
||||
|
||||
call_count = {"n": 0}
|
||||
|
||||
async def _call_tool_fails(*a, **kw):
|
||||
call_count["n"] += 1
|
||||
raise RuntimeError("still broken")
|
||||
|
||||
_install_stub_server(mcp_tool, "srv", _call_tool_fails)
|
||||
mcp_tool._ensure_mcp_loop()
|
||||
|
||||
try:
|
||||
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD
|
||||
fake_now = [1000.0]
|
||||
|
||||
def _fake_monotonic():
|
||||
return fake_now[0]
|
||||
|
||||
monkeypatch.setattr(mcp_tool.time, "monotonic", _fake_monotonic)
|
||||
if hasattr(mcp_tool, "_server_breaker_opened_at"):
|
||||
mcp_tool._server_breaker_opened_at["srv"] = fake_now[0]
|
||||
cooldown = getattr(mcp_tool, "_CIRCUIT_BREAKER_COOLDOWN_SEC", 60.0)
|
||||
|
||||
handler = _make_tool_handler("srv", "tool1", 10.0)
|
||||
|
||||
# Advance past cooldown, run probe, expect failure.
|
||||
fake_now[0] += cooldown + 1.0
|
||||
result = handler({})
|
||||
parsed = json.loads(result)
|
||||
assert "error" in parsed
|
||||
assert call_count["n"] == 1, "probe should invoke session once"
|
||||
|
||||
# The probe failure must have re-armed the cooldown — another
|
||||
# immediate call should short-circuit, not invoke session again.
|
||||
result = handler({})
|
||||
parsed = json.loads(result)
|
||||
assert "unreachable" in parsed.get("error", "").lower()
|
||||
assert call_count["n"] == 1, (
|
||||
"breaker should re-open and block further calls after probe failure"
|
||||
)
|
||||
finally:
|
||||
_cleanup(mcp_tool, "srv")
|
||||
|
||||
|
||||
def test_circuit_breaker_cleared_on_reconnect(monkeypatch, tmp_path):
|
||||
"""When the auth-recovery path successfully reconnects the server,
|
||||
the breaker should be cleared so subsequent calls aren't gated on a
|
||||
stale failure count — even if the post-reconnect retry itself fails.
|
||||
|
||||
This locks in the fix-#2 contract: a successful reconnect is
|
||||
sufficient evidence that the server is viable again. Under the old
|
||||
implementation, reset only happened on retry *success*, so a
|
||||
reconnect+retry-failure left the counter pinned above threshold
|
||||
forever.
|
||||
"""
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
from tools import mcp_tool
|
||||
from tools.mcp_oauth_manager import get_manager, reset_manager_for_tests
|
||||
from mcp.client.auth import OAuthFlowError
|
||||
|
||||
reset_manager_for_tests()
|
||||
|
||||
async def _call_tool_unused(*a, **kw): # pragma: no cover
|
||||
raise AssertionError("session.call_tool should not be reached in this test")
|
||||
|
||||
_install_stub_server(mcp_tool, "srv", _call_tool_unused)
|
||||
mcp_tool._ensure_mcp_loop()
|
||||
|
||||
# Open the breaker well above threshold, with a recent open-time so
|
||||
# it would short-circuit everything without a reset.
|
||||
mcp_tool._server_error_counts["srv"] = mcp_tool._CIRCUIT_BREAKER_THRESHOLD + 2
|
||||
if hasattr(mcp_tool, "_server_breaker_opened_at"):
|
||||
import time as _time
|
||||
mcp_tool._server_breaker_opened_at["srv"] = _time.monotonic()
|
||||
|
||||
# Force handle_401 to claim recovery succeeded.
|
||||
mgr = get_manager()
|
||||
|
||||
async def _h401(name, token=None):
|
||||
return True
|
||||
|
||||
monkeypatch.setattr(mgr, "handle_401", _h401)
|
||||
|
||||
try:
|
||||
# Retry fails *after* the successful reconnect. Under the old
|
||||
# implementation this bumps an already-tripped counter even
|
||||
# higher. Under fix #2 the reset happens on successful
|
||||
# reconnect, and the post-retry bump only raises the fresh
|
||||
# count to 1 — still below threshold.
|
||||
def _retry_call():
|
||||
raise OAuthFlowError("still failing post-reconnect")
|
||||
|
||||
result = mcp_tool._handle_auth_error_and_retry(
|
||||
"srv",
|
||||
OAuthFlowError("initial"),
|
||||
_retry_call,
|
||||
"tools/call test",
|
||||
)
|
||||
# The call as a whole still surfaces needs_reauth because the
|
||||
# retry itself didn't succeed, but the breaker state must
|
||||
# reflect the successful reconnect.
|
||||
assert result is not None
|
||||
parsed = json.loads(result)
|
||||
assert parsed.get("needs_reauth") is True, parsed
|
||||
|
||||
# Post-reconnect count was reset to 0, then the failing retry
|
||||
# bumped it to exactly 1 — well below threshold.
|
||||
count = mcp_tool._server_error_counts.get("srv", 0)
|
||||
assert count < mcp_tool._CIRCUIT_BREAKER_THRESHOLD, (
|
||||
f"successful reconnect must reset the breaker below threshold; "
|
||||
f"got count={count}, threshold={mcp_tool._CIRCUIT_BREAKER_THRESHOLD}"
|
||||
)
|
||||
finally:
|
||||
_cleanup(mcp_tool, "srv")
|
||||
|
|
@ -173,6 +173,8 @@ def test_terminal_output_transform_does_not_change_approval_or_exit_code_meaning
|
|||
|
||||
|
||||
def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp_path):
|
||||
import yaml
|
||||
|
||||
hermes_home = Path(os.environ["HERMES_HOME"])
|
||||
plugins_dir = hermes_home / "plugins"
|
||||
plugin_dir = plugins_dir / "terminal_transform"
|
||||
|
|
@ -184,7 +186,15 @@ def test_terminal_output_transform_integration_with_real_plugin(monkeypatch, tmp
|
|||
'lambda **kw: "PLUGIN-HEAD\\n" + kw["output"] + "\\nPLUGIN-TAIL")\n',
|
||||
encoding="utf-8",
|
||||
)
|
||||
# Plugins are opt-in — must be listed in plugins.enabled to load.
|
||||
cfg_path = hermes_home / "config.yaml"
|
||||
cfg_path.write_text(
|
||||
yaml.safe_dump({"plugins": {"enabled": ["terminal_transform"]}}),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
# Force a fresh plugin manager so the new config is picked up.
|
||||
plugins_mod._plugin_manager = plugins_mod.PluginManager()
|
||||
plugins_mod.discover_plugins()
|
||||
|
||||
long_output = "X" * 60000
|
||||
|
|
|
|||
198
tests/tools/test_tts_kittentts.py
Normal file
198
tests/tools/test_tts_kittentts.py
Normal file
|
|
@ -0,0 +1,198 @@
|
|||
"""Tests for the KittenTTS local provider in tools/tts_tool.py."""
|
||||
|
||||
import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import numpy as np
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clean_env(monkeypatch):
|
||||
for key in ("HERMES_SESSION_PLATFORM",):
|
||||
monkeypatch.delenv(key, raising=False)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def clear_kittentts_cache():
|
||||
"""Reset the module-level model cache between tests."""
|
||||
from tools import tts_tool as _tt
|
||||
_tt._kittentts_model_cache.clear()
|
||||
yield
|
||||
_tt._kittentts_model_cache.clear()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_kittentts_module():
|
||||
"""Inject a fake kittentts + soundfile module that return stub objects."""
|
||||
fake_model = MagicMock()
|
||||
# 24kHz float32 PCM at ~2s of silence
|
||||
fake_model.generate.return_value = np.zeros(48000, dtype=np.float32)
|
||||
fake_cls = MagicMock(return_value=fake_model)
|
||||
fake_kittentts = MagicMock()
|
||||
fake_kittentts.KittenTTS = fake_cls
|
||||
|
||||
# Stub soundfile — the real package isn't installed in CI venv, and
|
||||
# _generate_kittentts does `import soundfile as sf` at runtime.
|
||||
fake_sf = MagicMock()
|
||||
def _fake_write(path, audio, samplerate):
|
||||
# Emulate writing a real file so downstream path checks succeed.
|
||||
import pathlib
|
||||
pathlib.Path(path).write_bytes(b"RIFF\x00\x00\x00\x00WAVEfmt fake")
|
||||
fake_sf.write = _fake_write
|
||||
|
||||
with patch.dict(
|
||||
"sys.modules",
|
||||
{"kittentts": fake_kittentts, "soundfile": fake_sf},
|
||||
):
|
||||
yield fake_model, fake_cls
|
||||
|
||||
|
||||
class TestGenerateKittenTts:
|
||||
def test_successful_wav_generation(self, tmp_path, mock_kittentts_module):
|
||||
from tools.tts_tool import _generate_kittentts
|
||||
|
||||
fake_model, fake_cls = mock_kittentts_module
|
||||
output_path = str(tmp_path / "test.wav")
|
||||
result = _generate_kittentts("Hello world", output_path, {})
|
||||
|
||||
assert result == output_path
|
||||
assert (tmp_path / "test.wav").exists()
|
||||
fake_cls.assert_called_once()
|
||||
fake_model.generate.assert_called_once()
|
||||
|
||||
def test_config_passes_voice_speed_cleantext(self, tmp_path, mock_kittentts_module):
|
||||
from tools.tts_tool import _generate_kittentts
|
||||
|
||||
fake_model, _ = mock_kittentts_module
|
||||
config = {
|
||||
"kittentts": {
|
||||
"model": "KittenML/kitten-tts-mini-0.8",
|
||||
"voice": "Luna",
|
||||
"speed": 1.25,
|
||||
"clean_text": False,
|
||||
}
|
||||
}
|
||||
_generate_kittentts("Hi there", str(tmp_path / "out.wav"), config)
|
||||
|
||||
call_kwargs = fake_model.generate.call_args.kwargs
|
||||
assert call_kwargs["voice"] == "Luna"
|
||||
assert call_kwargs["speed"] == 1.25
|
||||
assert call_kwargs["clean_text"] is False
|
||||
|
||||
def test_default_model_and_voice(self, tmp_path, mock_kittentts_module):
|
||||
from tools.tts_tool import (
|
||||
DEFAULT_KITTENTTS_MODEL,
|
||||
DEFAULT_KITTENTTS_VOICE,
|
||||
_generate_kittentts,
|
||||
)
|
||||
|
||||
fake_model, fake_cls = mock_kittentts_module
|
||||
_generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
|
||||
|
||||
fake_cls.assert_called_once_with(DEFAULT_KITTENTTS_MODEL)
|
||||
assert fake_model.generate.call_args.kwargs["voice"] == DEFAULT_KITTENTTS_VOICE
|
||||
|
||||
def test_model_is_cached_across_calls(self, tmp_path, mock_kittentts_module):
|
||||
from tools.tts_tool import _generate_kittentts
|
||||
|
||||
_, fake_cls = mock_kittentts_module
|
||||
_generate_kittentts("One", str(tmp_path / "a.wav"), {})
|
||||
_generate_kittentts("Two", str(tmp_path / "b.wav"), {})
|
||||
|
||||
# Same model name → class instantiated exactly once
|
||||
assert fake_cls.call_count == 1
|
||||
|
||||
def test_different_models_are_cached_separately(self, tmp_path, mock_kittentts_module):
|
||||
from tools.tts_tool import _generate_kittentts
|
||||
|
||||
_, fake_cls = mock_kittentts_module
|
||||
_generate_kittentts(
|
||||
"A", str(tmp_path / "a.wav"),
|
||||
{"kittentts": {"model": "KittenML/kitten-tts-nano-0.8-int8"}},
|
||||
)
|
||||
_generate_kittentts(
|
||||
"B", str(tmp_path / "b.wav"),
|
||||
{"kittentts": {"model": "KittenML/kitten-tts-mini-0.8"}},
|
||||
)
|
||||
|
||||
assert fake_cls.call_count == 2
|
||||
|
||||
def test_non_wav_extension_triggers_ffmpeg_conversion(
|
||||
self, tmp_path, mock_kittentts_module, monkeypatch
|
||||
):
|
||||
"""Non-.wav output path causes WAV → target ffmpeg conversion."""
|
||||
from tools import tts_tool as _tt
|
||||
|
||||
calls = []
|
||||
|
||||
def fake_shutil_which(cmd):
|
||||
return "/usr/bin/ffmpeg" if cmd == "ffmpeg" else None
|
||||
|
||||
def fake_run(cmd, check=False, timeout=None, **kw):
|
||||
calls.append(cmd)
|
||||
# Emulate ffmpeg writing the output file
|
||||
import pathlib
|
||||
out_path = cmd[-1]
|
||||
pathlib.Path(out_path).write_bytes(b"fake-mp3-data")
|
||||
return MagicMock(returncode=0)
|
||||
|
||||
monkeypatch.setattr(_tt.shutil, "which", fake_shutil_which)
|
||||
monkeypatch.setattr(_tt.subprocess, "run", fake_run)
|
||||
|
||||
output_path = str(tmp_path / "test.mp3")
|
||||
result = _tt._generate_kittentts("Hi", output_path, {})
|
||||
|
||||
assert result == output_path
|
||||
assert len(calls) == 1
|
||||
assert calls[0][0] == "/usr/bin/ffmpeg"
|
||||
|
||||
def test_missing_kittentts_raises_import_error(self, tmp_path, monkeypatch):
|
||||
"""When kittentts package is not installed, _import_kittentts raises."""
|
||||
import sys
|
||||
monkeypatch.setitem(sys.modules, "kittentts", None)
|
||||
from tools.tts_tool import _generate_kittentts
|
||||
|
||||
with pytest.raises((ImportError, TypeError)):
|
||||
_generate_kittentts("Hi", str(tmp_path / "out.wav"), {})
|
||||
|
||||
|
||||
class TestCheckKittenttsAvailable:
|
||||
def test_reports_available_when_package_present(self, monkeypatch):
|
||||
import importlib.util
|
||||
from tools.tts_tool import _check_kittentts_available
|
||||
|
||||
fake_spec = MagicMock()
|
||||
monkeypatch.setattr(
|
||||
importlib.util, "find_spec",
|
||||
lambda name: fake_spec if name == "kittentts" else None,
|
||||
)
|
||||
assert _check_kittentts_available() is True
|
||||
|
||||
def test_reports_unavailable_when_package_missing(self, monkeypatch):
|
||||
import importlib.util
|
||||
from tools.tts_tool import _check_kittentts_available
|
||||
|
||||
monkeypatch.setattr(importlib.util, "find_spec", lambda name: None)
|
||||
assert _check_kittentts_available() is False
|
||||
|
||||
|
||||
class TestDispatcherBranch:
|
||||
def test_kittentts_not_installed_returns_helpful_error(self, monkeypatch, tmp_path):
|
||||
"""When provider=kittentts but package missing, return JSON error with setup hint."""
|
||||
import sys
|
||||
monkeypatch.setitem(sys.modules, "kittentts", None)
|
||||
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||
|
||||
from tools.tts_tool import text_to_speech_tool
|
||||
|
||||
# Write a config telling it to use kittentts
|
||||
import yaml
|
||||
(tmp_path / "config.yaml").write_text(
|
||||
yaml.safe_dump({"tts": {"provider": "kittentts"}})
|
||||
)
|
||||
|
||||
result = json.loads(text_to_speech_tool(text="Hello"))
|
||||
assert result["success"] is False
|
||||
assert "kittentts" in result["error"].lower()
|
||||
assert "hermes setup tts" in result["error"].lower()
|
||||
|
|
@ -933,6 +933,58 @@ class TestEnableVoiceModeReal:
|
|||
assert cli._voice_mode is True
|
||||
|
||||
|
||||
class TestVoiceBeepConfigReal:
|
||||
"""Tests the CLI voice beep toggle."""
|
||||
|
||||
@patch("hermes_cli.config.load_config", return_value={"voice": {}})
|
||||
def test_beeps_enabled_by_default(self, _cfg):
|
||||
cli = _make_voice_cli()
|
||||
assert cli._voice_beeps_enabled() is True
|
||||
|
||||
@patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
|
||||
def test_beeps_can_be_disabled(self, _cfg):
|
||||
cli = _make_voice_cli()
|
||||
assert cli._voice_beeps_enabled() is False
|
||||
|
||||
@patch("cli._cprint")
|
||||
@patch("cli.threading.Thread")
|
||||
@patch("tools.voice_mode.play_beep")
|
||||
@patch("tools.voice_mode.create_audio_recorder")
|
||||
@patch(
|
||||
"tools.voice_mode.check_voice_requirements",
|
||||
return_value={
|
||||
"available": True,
|
||||
"audio_available": True,
|
||||
"stt_available": True,
|
||||
"details": "OK",
|
||||
"missing_packages": [],
|
||||
},
|
||||
)
|
||||
@patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={
|
||||
"voice": {
|
||||
"beep_enabled": False,
|
||||
"silence_threshold": 200,
|
||||
"silence_duration": 3.0,
|
||||
}
|
||||
},
|
||||
)
|
||||
def test_start_recording_skips_beep_when_disabled(
|
||||
self, _cfg, _req, mock_create, mock_beep, mock_thread, _cp
|
||||
):
|
||||
recorder = MagicMock()
|
||||
recorder.supports_silence_autostop = True
|
||||
mock_create.return_value = recorder
|
||||
mock_thread.return_value = MagicMock(start=MagicMock())
|
||||
|
||||
cli = _make_voice_cli()
|
||||
cli._voice_start_recording()
|
||||
|
||||
recorder.start.assert_called_once()
|
||||
mock_beep.assert_not_called()
|
||||
|
||||
|
||||
class TestDisableVoiceModeReal:
|
||||
"""Tests _disable_voice_mode with real CLI instance."""
|
||||
|
||||
|
|
@ -1087,6 +1139,16 @@ class TestVoiceStopAndTranscribeReal:
|
|||
cli._voice_stop_and_transcribe()
|
||||
assert cli._pending_input.empty()
|
||||
|
||||
@patch("cli._cprint")
|
||||
@patch("hermes_cli.config.load_config", return_value={"voice": {"beep_enabled": False}})
|
||||
@patch("tools.voice_mode.play_beep")
|
||||
def test_no_speech_detected_skips_beep_when_disabled(self, mock_beep, _cfg, _cp):
|
||||
recorder = MagicMock()
|
||||
recorder.stop.return_value = None
|
||||
cli = _make_voice_cli(_voice_recording=True, _voice_recorder=recorder)
|
||||
cli._voice_stop_and_transcribe()
|
||||
mock_beep.assert_not_called()
|
||||
|
||||
@patch("cli._cprint")
|
||||
@patch("cli.os.unlink")
|
||||
@patch("cli.os.path.isfile", return_value=True)
|
||||
|
|
|
|||
|
|
@ -44,16 +44,59 @@ def _get_allowed() -> set[str]:
|
|||
_config_passthrough: frozenset[str] | None = None
|
||||
|
||||
|
||||
def _is_hermes_provider_credential(name: str) -> bool:
|
||||
"""True if ``name`` is a Hermes-managed provider credential (API key,
|
||||
token, or similar) per ``_HERMES_PROVIDER_ENV_BLOCKLIST``.
|
||||
|
||||
Skill-declared ``required_environment_variables`` frontmatter must
|
||||
not be able to override this list — that was the bypass in
|
||||
GHSA-rhgp-j443-p4rf where a malicious skill registered
|
||||
``ANTHROPIC_TOKEN`` / ``OPENAI_API_KEY`` as passthrough and received
|
||||
the credential in the ``execute_code`` child process, defeating the
|
||||
sandbox's scrubbing guarantee.
|
||||
|
||||
Non-Hermes API keys (TENOR_API_KEY, NOTION_TOKEN, etc.) are NOT
|
||||
in the blocklist and remain legitimately registerable — skills that
|
||||
wrap third-party APIs still work.
|
||||
"""
|
||||
try:
|
||||
from tools.environments.local import _HERMES_PROVIDER_ENV_BLOCKLIST
|
||||
except Exception:
|
||||
return False
|
||||
return name in _HERMES_PROVIDER_ENV_BLOCKLIST
|
||||
|
||||
|
||||
def register_env_passthrough(var_names: Iterable[str]) -> None:
|
||||
"""Register environment variable names as allowed in sandboxed environments.
|
||||
|
||||
Typically called when a skill declares ``required_environment_variables``.
|
||||
|
||||
Variables that are Hermes-managed provider credentials (from
|
||||
``_HERMES_PROVIDER_ENV_BLOCKLIST``) are rejected here to preserve
|
||||
the ``execute_code`` sandbox's credential-scrubbing guarantee per
|
||||
GHSA-rhgp-j443-p4rf. A skill that needs to talk to a Hermes-managed
|
||||
provider should do so via the agent's main-process tools (web_search,
|
||||
web_extract, etc.) where the credential remains safely in the main
|
||||
process.
|
||||
|
||||
Non-Hermes third-party API keys (TENOR_API_KEY, NOTION_TOKEN, etc.)
|
||||
pass through normally — they were never in the sandbox scrub list.
|
||||
"""
|
||||
for name in var_names:
|
||||
name = name.strip()
|
||||
if name:
|
||||
_get_allowed().add(name)
|
||||
logger.debug("env passthrough: registered %s", name)
|
||||
if not name:
|
||||
continue
|
||||
if _is_hermes_provider_credential(name):
|
||||
logger.warning(
|
||||
"env passthrough: refusing to register Hermes provider "
|
||||
"credential %r (blocked by _HERMES_PROVIDER_ENV_BLOCKLIST). "
|
||||
"Skills must not override the execute_code sandbox's "
|
||||
"credential scrubbing; see GHSA-rhgp-j443-p4rf.",
|
||||
name,
|
||||
)
|
||||
continue
|
||||
_get_allowed().add(name)
|
||||
logger.debug("env passthrough: registered %s", name)
|
||||
|
||||
|
||||
def _load_config_passthrough() -> frozenset[str]:
|
||||
|
|
|
|||
|
|
@ -213,6 +213,77 @@ def _make_run_env(env: dict) -> dict:
|
|||
return run_env
|
||||
|
||||
|
||||
def _read_terminal_shell_init_config() -> tuple[list[str], bool]:
|
||||
"""Return (shell_init_files, auto_source_bashrc) from config.yaml.
|
||||
|
||||
Best-effort — returns sensible defaults on any failure so terminal
|
||||
execution never breaks because the config file is unreadable.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
|
||||
cfg = load_config() or {}
|
||||
terminal_cfg = cfg.get("terminal") or {}
|
||||
files = terminal_cfg.get("shell_init_files") or []
|
||||
if not isinstance(files, list):
|
||||
files = []
|
||||
auto_bashrc = bool(terminal_cfg.get("auto_source_bashrc", True))
|
||||
return [str(f) for f in files if f], auto_bashrc
|
||||
except Exception:
|
||||
return [], True
|
||||
|
||||
|
||||
def _resolve_shell_init_files() -> list[str]:
|
||||
"""Resolve the list of files to source before the login-shell snapshot.
|
||||
|
||||
Expands ``~`` and ``${VAR}`` references and drops anything that doesn't
|
||||
exist on disk, so a missing ``~/.bashrc`` never breaks the snapshot.
|
||||
The ``auto_source_bashrc`` path runs only when the user hasn't supplied
|
||||
an explicit list — once they have, Hermes trusts them.
|
||||
"""
|
||||
explicit, auto_bashrc = _read_terminal_shell_init_config()
|
||||
|
||||
candidates: list[str] = []
|
||||
if explicit:
|
||||
candidates.extend(explicit)
|
||||
elif auto_bashrc and not _IS_WINDOWS:
|
||||
# Bash's login-shell invocation does NOT source ~/.bashrc by default,
|
||||
# so tools like nvm / asdf / pyenv that self-install there stay
|
||||
# invisible to the snapshot without this nudge.
|
||||
candidates.append("~/.bashrc")
|
||||
|
||||
resolved: list[str] = []
|
||||
for raw in candidates:
|
||||
try:
|
||||
path = os.path.expandvars(os.path.expanduser(raw))
|
||||
except Exception:
|
||||
continue
|
||||
if path and os.path.isfile(path):
|
||||
resolved.append(path)
|
||||
return resolved
|
||||
|
||||
|
||||
def _prepend_shell_init(cmd_string: str, files: list[str]) -> str:
|
||||
"""Prepend ``source <file>`` lines (guarded + silent) to a bash script.
|
||||
|
||||
Each file is wrapped so a failing rc file doesn't abort the whole
|
||||
bootstrap: ``set +e`` keeps going on errors, ``2>/dev/null`` hides
|
||||
noisy prompts, and ``|| true`` neutralises the exit status.
|
||||
"""
|
||||
if not files:
|
||||
return cmd_string
|
||||
|
||||
prelude_parts = ["set +e"]
|
||||
for path in files:
|
||||
# shlex.quote isn't available here without an import; the files list
|
||||
# comes from os.path.expanduser output so it's a concrete absolute
|
||||
# path. Escape single quotes defensively anyway.
|
||||
safe = path.replace("'", "'\\''")
|
||||
prelude_parts.append(f"[ -r '{safe}' ] && . '{safe}' 2>/dev/null || true")
|
||||
prelude = "\n".join(prelude_parts) + "\n"
|
||||
return prelude + cmd_string
|
||||
|
||||
|
||||
class LocalEnvironment(BaseEnvironment):
|
||||
"""Run commands directly on the host machine.
|
||||
|
||||
|
|
@ -255,6 +326,16 @@ class LocalEnvironment(BaseEnvironment):
|
|||
timeout: int = 120,
|
||||
stdin_data: str | None = None) -> subprocess.Popen:
|
||||
bash = _find_bash()
|
||||
# For login-shell invocations (used by init_session to build the
|
||||
# environment snapshot), prepend sources for the user's bashrc /
|
||||
# custom init files so tools registered outside bash_profile
|
||||
# (nvm, asdf, pyenv, …) end up on PATH in the captured snapshot.
|
||||
# Non-login invocations are already sourcing the snapshot and
|
||||
# don't need this.
|
||||
if login:
|
||||
init_files = _resolve_shell_init_files()
|
||||
if init_files:
|
||||
cmd_string = _prepend_shell_init(cmd_string, init_files)
|
||||
args = [bash, "-l", "-c", cmd_string] if login else [bash, "-c", cmd_string]
|
||||
run_env = _make_run_env(self.env)
|
||||
|
||||
|
|
|
|||
|
|
@ -35,6 +35,13 @@ from pathlib import Path
|
|||
from hermes_constants import get_hermes_home
|
||||
from tools.binary_extensions import BINARY_EXTENSIONS
|
||||
|
||||
from agent.file_safety import (
|
||||
build_write_denied_paths,
|
||||
build_write_denied_prefixes,
|
||||
get_safe_write_root as _shared_get_safe_write_root,
|
||||
is_write_denied as _shared_is_write_denied,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Write-path deny list — blocks writes to sensitive system/credential files
|
||||
|
|
@ -42,41 +49,9 @@ from tools.binary_extensions import BINARY_EXTENSIONS
|
|||
|
||||
_HOME = str(Path.home())
|
||||
|
||||
WRITE_DENIED_PATHS = {
|
||||
os.path.realpath(p) for p in [
|
||||
os.path.join(_HOME, ".ssh", "authorized_keys"),
|
||||
os.path.join(_HOME, ".ssh", "id_rsa"),
|
||||
os.path.join(_HOME, ".ssh", "id_ed25519"),
|
||||
os.path.join(_HOME, ".ssh", "config"),
|
||||
str(get_hermes_home() / ".env"),
|
||||
os.path.join(_HOME, ".bashrc"),
|
||||
os.path.join(_HOME, ".zshrc"),
|
||||
os.path.join(_HOME, ".profile"),
|
||||
os.path.join(_HOME, ".bash_profile"),
|
||||
os.path.join(_HOME, ".zprofile"),
|
||||
os.path.join(_HOME, ".netrc"),
|
||||
os.path.join(_HOME, ".pgpass"),
|
||||
os.path.join(_HOME, ".npmrc"),
|
||||
os.path.join(_HOME, ".pypirc"),
|
||||
"/etc/sudoers",
|
||||
"/etc/passwd",
|
||||
"/etc/shadow",
|
||||
]
|
||||
}
|
||||
WRITE_DENIED_PATHS = build_write_denied_paths(_HOME)
|
||||
|
||||
WRITE_DENIED_PREFIXES = [
|
||||
os.path.realpath(p) + os.sep for p in [
|
||||
os.path.join(_HOME, ".ssh"),
|
||||
os.path.join(_HOME, ".aws"),
|
||||
os.path.join(_HOME, ".gnupg"),
|
||||
os.path.join(_HOME, ".kube"),
|
||||
"/etc/sudoers.d",
|
||||
"/etc/systemd",
|
||||
os.path.join(_HOME, ".docker"),
|
||||
os.path.join(_HOME, ".azure"),
|
||||
os.path.join(_HOME, ".config", "gh"),
|
||||
]
|
||||
]
|
||||
WRITE_DENIED_PREFIXES = build_write_denied_prefixes(_HOME)
|
||||
|
||||
|
||||
def _get_safe_write_root() -> Optional[str]:
|
||||
|
|
@ -87,33 +62,12 @@ def _get_safe_write_root() -> Optional[str]:
|
|||
not on the static deny list. Opt-in hardening for gateway/messaging
|
||||
deployments that should only touch a workspace checkout.
|
||||
"""
|
||||
root = os.getenv("HERMES_WRITE_SAFE_ROOT", "")
|
||||
if not root:
|
||||
return None
|
||||
try:
|
||||
return os.path.realpath(os.path.expanduser(root))
|
||||
except Exception:
|
||||
return None
|
||||
return _shared_get_safe_write_root()
|
||||
|
||||
|
||||
def _is_write_denied(path: str) -> bool:
|
||||
"""Return True if path is on the write deny list."""
|
||||
resolved = os.path.realpath(os.path.expanduser(str(path)))
|
||||
|
||||
# 1) Static deny list
|
||||
if resolved in WRITE_DENIED_PATHS:
|
||||
return True
|
||||
for prefix in WRITE_DENIED_PREFIXES:
|
||||
if resolved.startswith(prefix):
|
||||
return True
|
||||
|
||||
# 2) Optional safe-root sandbox
|
||||
safe_root = _get_safe_write_root()
|
||||
if safe_root:
|
||||
if not (resolved == safe_root or resolved.startswith(safe_root + os.sep)):
|
||||
return True
|
||||
|
||||
return False
|
||||
return _shared_is_write_denied(path)
|
||||
|
||||
|
||||
# =============================================================================
|
||||
|
|
@ -784,12 +738,14 @@ class ShellFileOperations(FileOperations):
|
|||
content, old_string, new_string, replace_all
|
||||
)
|
||||
|
||||
if error:
|
||||
return PatchResult(error=error)
|
||||
|
||||
if match_count == 0:
|
||||
return PatchResult(error=f"Could not find match for old_string in {path}")
|
||||
|
||||
if error or match_count == 0:
|
||||
err_msg = error or f"Could not find match for old_string in {path}"
|
||||
try:
|
||||
from tools.fuzzy_match import format_no_match_hint
|
||||
err_msg += format_no_match_hint(err_msg, match_count, old_string, content)
|
||||
except Exception:
|
||||
pass
|
||||
return PatchResult(error=err_msg)
|
||||
# Write back
|
||||
write_result = self.write_file(path, new_content)
|
||||
if write_result.error:
|
||||
|
|
|
|||
|
|
@ -7,6 +7,9 @@ import logging
|
|||
import os
|
||||
import threading
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from agent.file_safety import get_read_block_error
|
||||
from tools.binary_extensions import has_binary_extension
|
||||
from tools.file_operations import ShellFileOperations
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
|
@ -373,24 +376,9 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
|||
|
||||
# ── Hermes internal path guard ────────────────────────────────
|
||||
# Prevent prompt injection via catalog or hub metadata files.
|
||||
from hermes_constants import get_hermes_home as _get_hh
|
||||
_hermes_home = _get_hh().resolve()
|
||||
_blocked_dirs = [
|
||||
_hermes_home / "skills" / ".hub" / "index-cache",
|
||||
_hermes_home / "skills" / ".hub",
|
||||
]
|
||||
for _blocked in _blocked_dirs:
|
||||
try:
|
||||
_resolved.relative_to(_blocked)
|
||||
return json.dumps({
|
||||
"error": (
|
||||
f"Access denied: {path} is an internal Hermes cache file "
|
||||
"and cannot be read directly to prevent prompt injection. "
|
||||
"Use the skills_list or skill_view tools instead."
|
||||
)
|
||||
})
|
||||
except ValueError:
|
||||
pass
|
||||
block_error = get_read_block_error(path)
|
||||
if block_error:
|
||||
return json.dumps({"error": block_error})
|
||||
|
||||
# ── Dedup check ───────────────────────────────────────────────
|
||||
# If we already read this exact (path, offset, limit) and the
|
||||
|
|
@ -682,8 +670,11 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
|
|||
result_json = json.dumps(result_dict, ensure_ascii=False)
|
||||
# Hint when old_string not found — saves iterations where the agent
|
||||
# retries with stale content instead of re-reading the file.
|
||||
# Suppressed when patch_replace already attached a rich "Did you mean?"
|
||||
# snippet (which is strictly more useful than the generic hint).
|
||||
if result_dict.get("error") and "Could not find" in str(result_dict["error"]):
|
||||
result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
|
||||
if "Did you mean one of these sections?" not in str(result_dict["error"]):
|
||||
result_json += "\n\n[Hint: old_string not found. Use read_file to verify the current content, or search_files to locate the text.]"
|
||||
return result_json
|
||||
except Exception as e:
|
||||
return tool_error(str(e))
|
||||
|
|
|
|||
|
|
@ -619,3 +619,86 @@ def _map_normalized_positions(original: str, normalized: str,
|
|||
original_matches.append((orig_start, min(orig_end, len(original))))
|
||||
|
||||
return original_matches
|
||||
|
||||
|
||||
def find_closest_lines(old_string: str, content: str, context_lines: int = 2, max_results: int = 3) -> str:
|
||||
"""Find lines in content most similar to old_string for "did you mean?" feedback.
|
||||
|
||||
Returns a formatted string showing the closest matching lines with context,
|
||||
or empty string if no useful match is found.
|
||||
"""
|
||||
if not old_string or not content:
|
||||
return ""
|
||||
|
||||
old_lines = old_string.splitlines()
|
||||
content_lines = content.splitlines()
|
||||
|
||||
if not old_lines or not content_lines:
|
||||
return ""
|
||||
|
||||
# Use first line of old_string as anchor for search
|
||||
anchor = old_lines[0].strip()
|
||||
if not anchor:
|
||||
# Try second line if first is blank
|
||||
candidates = [l.strip() for l in old_lines if l.strip()]
|
||||
if not candidates:
|
||||
return ""
|
||||
anchor = candidates[0]
|
||||
|
||||
# Score each line in content by similarity to anchor
|
||||
scored = []
|
||||
for i, line in enumerate(content_lines):
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
continue
|
||||
ratio = SequenceMatcher(None, anchor, stripped).ratio()
|
||||
if ratio > 0.3:
|
||||
scored.append((ratio, i))
|
||||
|
||||
if not scored:
|
||||
return ""
|
||||
|
||||
# Take top matches
|
||||
scored.sort(key=lambda x: -x[0])
|
||||
top = scored[:max_results]
|
||||
|
||||
parts = []
|
||||
seen_ranges = set()
|
||||
for _, line_idx in top:
|
||||
start = max(0, line_idx - context_lines)
|
||||
end = min(len(content_lines), line_idx + len(old_lines) + context_lines)
|
||||
key = (start, end)
|
||||
if key in seen_ranges:
|
||||
continue
|
||||
seen_ranges.add(key)
|
||||
snippet = "\n".join(
|
||||
f"{start + j + 1:4d}| {content_lines[start + j]}"
|
||||
for j in range(end - start)
|
||||
)
|
||||
parts.append(snippet)
|
||||
|
||||
if not parts:
|
||||
return ""
|
||||
|
||||
return "\n---\n".join(parts)
|
||||
|
||||
|
||||
def format_no_match_hint(error: Optional[str], match_count: int,
|
||||
old_string: str, content: str) -> str:
|
||||
"""Return a '\\n\\nDid you mean...' snippet for plain no-match errors.
|
||||
|
||||
Gated so the hint only fires for actual "old_string not found" failures.
|
||||
Ambiguous-match ("Found N matches"), escape-drift, and identical-strings
|
||||
errors all have ``match_count == 0`` but a "did you mean?" snippet would
|
||||
be misleading — those failed for unrelated reasons.
|
||||
|
||||
Returns an empty string when there's nothing useful to append.
|
||||
"""
|
||||
if match_count != 0:
|
||||
return ""
|
||||
if not error or not error.startswith("Could not find"):
|
||||
return ""
|
||||
hint = find_closest_lines(old_string, content)
|
||||
if not hint:
|
||||
return ""
|
||||
return "\n\nDid you mean one of these sections?\n" + hint
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Add table
Add a link
Reference in a new issue