Merge remote-tracking branch 'origin/main' into fix/bundle-size

This commit is contained in:
ethernet 2026-05-11 16:01:00 -04:00
commit 3197b4de6d
1437 changed files with 219762 additions and 11968 deletions

View file

@ -0,0 +1,19 @@
"""Fixtures shared across hermes_cli kanban tests."""
from __future__ import annotations
import pytest
@pytest.fixture
def all_assignees_spawnable(monkeypatch):
"""Pretend every assignee maps to a real Hermes profile.
Most dispatcher tests use synthetic assignees ("alice", "bob") that
don't correspond to actual profile directories on disk. Without this
patch, the dispatcher's profile-exists guard (PR #20105) routes
those tasks into ``skipped_nonspawnable`` instead of spawning, which
would break tests that assert spawn behavior.
"""
from hermes_cli import profiles
monkeypatch.setattr(profiles, "profile_exists", lambda name: True)

View file

@ -0,0 +1,141 @@
"""Regression tests for _apply_profile_override HERMES_HOME guard (issue #22502).
When HERMES_HOME is set to the hermes root (e.g. systemd hardcodes
HERMES_HOME=/root/.hermes), _apply_profile_override must still read
active_profile and update HERMES_HOME to the profile directory.
When HERMES_HOME is already a profile directory (.../profiles/<name>),
_apply_profile_override must trust it and return without re-reading
active_profile (child-process inheritance contract).
"""
from __future__ import annotations
import os
import sys
from pathlib import Path
import pytest
def _run_apply_profile_override(
tmp_path, monkeypatch, *, hermes_home: str | None, active_profile: str | None,
argv: list[str] | None = None,
):
"""Run _apply_profile_override in isolation.
Returns the value of os.environ["HERMES_HOME"] after the call,
or None if unset.
"""
hermes_root = tmp_path / ".hermes"
hermes_root.mkdir(parents=True, exist_ok=True)
if active_profile is not None:
(hermes_root / "active_profile").write_text(active_profile)
if active_profile and active_profile != "default":
(hermes_root / "profiles" / active_profile).mkdir(parents=True, exist_ok=True)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
if hermes_home is not None:
monkeypatch.setenv("HERMES_HOME", hermes_home)
else:
monkeypatch.delenv("HERMES_HOME", raising=False)
monkeypatch.setattr(sys, "argv", argv or ["hermes", "gateway", "start"])
from hermes_cli.main import _apply_profile_override
_apply_profile_override()
return os.environ.get("HERMES_HOME")
class TestApplyProfileOverrideHermesHomeGuard:
"""Regression guard for issue #22502.
Verifies that HERMES_HOME pointing to the hermes root does NOT suppress
the active_profile check, while HERMES_HOME already pointing to a
profile directory IS trusted as-is.
"""
def test_hermes_home_at_root_with_active_profile_is_redirected(
self, tmp_path, monkeypatch
):
"""HERMES_HOME=/root/.hermes + active_profile=coder must redirect
HERMES_HOME to .../profiles/coder.
Bug scenario from #22502: systemd sets HERMES_HOME to the hermes root
and the user switches to a profile via `hermes profile use`.
Before the fix, the guard returned early and active_profile was ignored.
"""
hermes_root = tmp_path / ".hermes"
hermes_root.mkdir(parents=True, exist_ok=True)
result = _run_apply_profile_override(
tmp_path,
monkeypatch,
hermes_home=str(hermes_root),
active_profile="coder",
)
assert result is not None, "HERMES_HOME must be set after profile redirect"
assert "profiles" in result, (
f"Expected HERMES_HOME to point into profiles/ dir, got: {result!r}"
)
assert result.endswith("coder"), (
f"Expected HERMES_HOME to end with 'coder', got: {result!r}"
)
def test_hermes_home_already_profile_dir_is_trusted(self, tmp_path, monkeypatch):
"""HERMES_HOME=.../profiles/coder must not be overridden even when
active_profile says something different.
Preserves the child-process inheritance contract: a subprocess spawned
with HERMES_HOME already set to a specific profile must stay in that
profile.
"""
hermes_root = tmp_path / ".hermes"
profile_dir = hermes_root / "profiles" / "coder"
profile_dir.mkdir(parents=True, exist_ok=True)
(hermes_root / "active_profile").write_text("other")
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(profile_dir))
monkeypatch.setattr(sys, "argv", ["hermes", "gateway", "start"])
from hermes_cli.main import _apply_profile_override
_apply_profile_override()
assert os.environ.get("HERMES_HOME") == str(profile_dir), (
"HERMES_HOME must remain unchanged when already pointing to a profile dir"
)
def test_hermes_home_unset_reads_active_profile(self, tmp_path, monkeypatch):
"""Classic case: HERMES_HOME unset + active_profile=coder must set
HERMES_HOME to the profile directory (existing behaviour must not regress).
"""
result = _run_apply_profile_override(
tmp_path,
monkeypatch,
hermes_home=None,
active_profile="coder",
)
assert result is not None
assert "coder" in result
def test_hermes_home_unset_default_profile_no_redirect(self, tmp_path, monkeypatch):
"""active_profile=default must not redirect HERMES_HOME."""
hermes_root = tmp_path / ".hermes"
hermes_root.mkdir(parents=True, exist_ok=True)
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.delenv("HERMES_HOME", raising=False)
monkeypatch.setattr(sys, "argv", ["hermes", "gateway", "start"])
(hermes_root / "active_profile").write_text("default")
from hermes_cli.main import _apply_profile_override
_apply_profile_override()
assert os.environ.get("HERMES_HOME") is None

View file

@ -5,8 +5,10 @@ from __future__ import annotations
import base64
import json
from datetime import datetime, timezone
from unittest.mock import patch
import pytest
import yaml
def _write_auth_store(tmp_path, payload: dict) -> None:
@ -589,6 +591,39 @@ def test_logout_clears_stale_active_codex_without_provider_credentials(tmp_path,
assert "provider: auto" in config_text
def test_reset_config_provider_uses_atomic_yaml_write(tmp_path, monkeypatch):
"""Logout config reset should delegate the YAML write atomically."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
config_path = hermes_home / "config.yaml"
original = {
"model": {
"default": "gpt-5.3-codex",
"provider": "openai-codex",
"base_url": "https://chatgpt.com/backend-api/codex",
}
}
config_path.write_text(yaml.safe_dump(original, sort_keys=False), encoding="utf-8")
original_text = config_path.read_text(encoding="utf-8")
from hermes_cli.auth import _reset_config_provider
def _boom(path, data, **kwargs):
assert path == config_path
assert data["model"]["provider"] == "auto"
assert data["model"]["base_url"] == "https://openrouter.ai/api/v1"
assert kwargs["sort_keys"] is False
raise OSError("simulated atomic write failure")
with patch("hermes_cli.auth.atomic_yaml_write", side_effect=_boom) as mock_write:
with pytest.raises(OSError, match="simulated atomic write failure"):
_reset_config_provider()
assert mock_write.call_count == 1
assert config_path.read_text(encoding="utf-8") == original_text
def test_auth_list_does_not_call_mutating_select(monkeypatch, capsys):
from hermes_cli.auth_commands import auth_list_command

View file

@ -1,7 +1,6 @@
"""Regression tests for Nous OAuth refresh + agent-key mint interactions."""
import json
import os
from datetime import datetime, timezone
from pathlib import Path
@ -76,6 +75,20 @@ class TestResolveVerifyFallback:
)
assert result is False
def test_string_false_in_auth_state_does_not_disable_tls_verify(self):
import ssl
from hermes_cli.auth import _resolve_verify
result = _resolve_verify(auth_state={"tls": {"insecure": "false"}})
assert result is not False
assert result is True or isinstance(result, ssl.SSLContext)
def test_string_true_in_auth_state_disables_tls_verify(self):
from hermes_cli.auth import _resolve_verify
result = _resolve_verify(auth_state={"tls": {"insecure": "true"}})
assert result is False
def test_no_ca_bundle_returns_true(self, monkeypatch):
from hermes_cli.auth import _resolve_verify
@ -848,6 +861,46 @@ def test_refresh_token_reuse_detection_surfaces_actionable_message():
assert exc_info.value.relogin_required is True
def test_refresh_token_exchange_sends_refresh_token_header():
"""Nous refresh tokens must be sent in a header so sandbox proxies can
substitute placeholder credentials without parsing form bodies.
"""
from hermes_cli.auth import _refresh_access_token
class _FakeResponse:
status_code = 200
def json(self):
return {"access_token": "access-2", "refresh_token": "refresh-2"}
class _FakeClient:
def __init__(self):
self.kwargs = None
def post(self, *args, **kwargs):
del args
self.kwargs = kwargs
return _FakeResponse()
client = _FakeClient()
payload = _refresh_access_token(
client=client,
portal_base_url="https://portal.nousresearch.com",
client_id="hermes-cli",
refresh_token="refresh-1",
)
assert payload["access_token"] == "access-2"
assert payload["refresh_token"] == "refresh-2"
assert client.kwargs is not None
assert client.kwargs["headers"]["x-nous-refresh-token"] == "refresh-1"
assert client.kwargs["data"] == {
"grant_type": "refresh_token",
"client_id": "hermes-cli",
}
def test_refresh_non_reuse_error_keeps_original_description():
"""Non-reuse invalid_grant errors must keep their original description untouched.
@ -882,3 +935,370 @@ def test_refresh_non_reuse_error_keeps_original_description():
assert "Refresh session has been revoked" in str(exc_info.value)
# Must not have been rewritten with the reuse message.
assert "external process" not in str(exc_info.value).lower()
# =============================================================================
# Shared Nous token store — cross-profile persistence (Codex-style auto-import)
# =============================================================================
@pytest.fixture
def shared_store_env(tmp_path, monkeypatch):
"""Redirect HERMES_SHARED_AUTH_DIR to a tmp_path.
Required for every test that exercises the shared Nous store the
in-auth.py seat belt refuses to touch the real user's shared store
under pytest, so tests that forget this fixture fail loudly instead
of corrupting real state.
"""
shared_dir = tmp_path / "shared"
monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(shared_dir))
return shared_dir
def test_shared_store_seat_belt_refuses_real_home_under_pytest(monkeypatch):
"""Without HERMES_SHARED_AUTH_DIR override, the seat belt must trip.
Mirrors the existing ``_auth_file_path`` seat belt: forgetting to
redirect this store in a test must fail loudly instead of silently
writing to the user's real ``~/.hermes/shared/`` across CI runs.
"""
from hermes_cli.auth import _nous_shared_store_path
monkeypatch.delenv("HERMES_SHARED_AUTH_DIR", raising=False)
with pytest.raises(RuntimeError, match="shared Nous auth store"):
_nous_shared_store_path()
def test_shared_store_honors_env_override(tmp_path, monkeypatch):
"""HERMES_SHARED_AUTH_DIR must redirect the path."""
from hermes_cli.auth import _nous_shared_store_path, NOUS_SHARED_STORE_FILENAME
custom_dir = tmp_path / "custom_shared"
monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(custom_dir))
path = _nous_shared_store_path()
assert path == custom_dir / NOUS_SHARED_STORE_FILENAME
def test_shared_store_read_missing_returns_none(shared_store_env):
"""Missing file → ``_read_shared_nous_state()`` returns None."""
from hermes_cli.auth import _read_shared_nous_state
assert _read_shared_nous_state() is None
def test_shared_store_read_malformed_returns_none(shared_store_env):
"""Unreadable / non-JSON file → None, not an exception."""
from hermes_cli.auth import _nous_shared_store_path, _read_shared_nous_state
path = _nous_shared_store_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("{ not json")
assert _read_shared_nous_state() is None
def test_shared_store_read_missing_required_fields_returns_none(shared_store_env):
"""Payload without refresh_token → None (nothing worth importing)."""
from hermes_cli.auth import _nous_shared_store_path, _read_shared_nous_state
path = _nous_shared_store_path()
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps({"_schema": 1, "access_token": "abc"}))
assert _read_shared_nous_state() is None
def test_shared_store_write_and_read_roundtrip(shared_store_env):
"""Write → read must preserve refresh_token + OAuth URLs."""
from hermes_cli.auth import (
_nous_shared_store_path,
_read_shared_nous_state,
_write_shared_nous_state,
)
_write_shared_nous_state(_full_state_fixture())
path = _nous_shared_store_path()
assert path.is_file()
# Permissions should be 0600 where the platform supports it.
mode = path.stat().st_mode & 0o777
assert mode == 0o600 or mode == 0o644 # 0o644 on platforms without chmod
loaded = _read_shared_nous_state()
assert loaded is not None
assert loaded["refresh_token"] == "refresh-tok"
assert loaded["access_token"] == "access-tok"
assert loaded["portal_base_url"] == "https://portal.example.com"
assert loaded["inference_base_url"] == "https://inference.example.com/v1"
# Volatile agent_key MUST NOT be persisted to the shared store
# (24h TTL, profile-specific — only long-lived OAuth tokens are
# cross-profile useful).
assert "agent_key" not in loaded
def test_shared_store_write_skips_when_refresh_token_missing(shared_store_env):
"""Write is a no-op when refresh_token is absent (nothing to share)."""
from hermes_cli.auth import _nous_shared_store_path, _write_shared_nous_state
state = dict(_full_state_fixture())
state["refresh_token"] = ""
_write_shared_nous_state(state)
assert not _nous_shared_store_path().is_file()
def test_persist_nous_credentials_mirrors_to_shared_store(
tmp_path, monkeypatch, shared_store_env,
):
"""persist_nous_credentials must populate BOTH per-profile auth.json
AND the shared store, so a future profile's `hermes auth add nous
--type oauth` can one-tap import instead of redoing device-code.
"""
from hermes_cli.auth import (
_nous_shared_store_path,
_read_shared_nous_state,
persist_nous_credentials,
)
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(
json.dumps({"version": 1, "providers": {}})
)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
persist_nous_credentials(_full_state_fixture())
# Per-profile auth.json populated
payload = json.loads((hermes_home / "auth.json").read_text())
assert "nous" in payload.get("providers", {})
# Shared store populated with the same refresh_token
shared = _read_shared_nous_state()
assert shared is not None
assert shared["refresh_token"] == "refresh-tok"
# Shared file path lives under the tmp override, NOT the real home
assert str(_nous_shared_store_path()).startswith(str(shared_store_env))
def test_try_import_shared_returns_none_when_store_missing(shared_store_env):
"""No shared store → no rehydrate (fall through to device-code)."""
from hermes_cli.auth import _try_import_shared_nous_state
assert _try_import_shared_nous_state() is None
def test_try_import_shared_returns_none_on_refresh_failure(
shared_store_env, monkeypatch,
):
"""If the portal rejects the stored refresh_token (revoked, expired,
portal down), _try_import_shared_nous_state must return None so the
login flow falls back to a fresh device-code run.
"""
from hermes_cli import auth as auth_mod
# Seed the shared store
auth_mod._write_shared_nous_state(_full_state_fixture())
# Make refresh fail
def _boom(*_args, **_kwargs):
raise AuthError(
"Refresh session has been revoked",
provider="nous",
code="invalid_grant",
relogin_required=True,
)
monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _boom)
assert auth_mod._try_import_shared_nous_state() is None
def test_try_import_shared_rehydrates_on_success(shared_store_env, monkeypatch):
"""Happy path: stored refresh_token is accepted, forced refresh+mint
returns a fresh access_token + agent_key, and the returned dict has
every field persist_nous_credentials() needs.
"""
from hermes_cli import auth as auth_mod
auth_mod._write_shared_nous_state(_full_state_fixture())
def _fake_refresh(state, **kwargs):
# Simulate portal returning fresh tokens + a new agent_key
assert kwargs.get("force_refresh") is True
assert kwargs.get("force_mint") is True
return {
**state,
"access_token": "fresh-access-tok",
"refresh_token": "fresh-refresh-tok", # rotated
"agent_key": "new-agent-key",
"agent_key_expires_at": "2026-04-19T22:00:00+00:00",
}
monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
result = auth_mod._try_import_shared_nous_state()
assert result is not None
assert result["access_token"] == "fresh-access-tok"
assert result["refresh_token"] == "fresh-refresh-tok"
assert result["agent_key"] == "new-agent-key"
# Preserved from shared state
assert result["portal_base_url"] == "https://portal.example.com"
assert result["client_id"] == "hermes-cli"
def test_shared_store_survives_across_profile_switch(
tmp_path, monkeypatch, shared_store_env,
):
"""End-to-end: profile A logs in → shared store populated → profile B
(different HERMES_HOME) sees the same shared state and can rehydrate
without re-running device-code.
"""
from hermes_cli import auth as auth_mod
# Profile A: login, which mirrors to shared store
profile_a = tmp_path / "profile_a"
profile_a.mkdir(parents=True, exist_ok=True)
(profile_a / "auth.json").write_text(
json.dumps({"version": 1, "providers": {}})
)
monkeypatch.setenv("HERMES_HOME", str(profile_a))
auth_mod.persist_nous_credentials(_full_state_fixture())
# Profile A's auth.json has nous
a_payload = json.loads((profile_a / "auth.json").read_text())
assert "nous" in a_payload.get("providers", {})
# Profile B: fresh HERMES_HOME, no auth yet, but the shared store
# persists — _read_shared_nous_state() must still return the tokens.
profile_b = tmp_path / "profile_b"
profile_b.mkdir(parents=True, exist_ok=True)
(profile_b / "auth.json").write_text(
json.dumps({"version": 1, "providers": {}})
)
monkeypatch.setenv("HERMES_HOME", str(profile_b))
# B's own auth.json has no nous
b_payload = json.loads((profile_b / "auth.json").read_text())
assert "nous" not in b_payload.get("providers", {})
# But the shared store is visible
shared = auth_mod._read_shared_nous_state()
assert shared is not None
assert shared["refresh_token"] == "refresh-tok"
# And a successful rehydrate + persist lands nous into profile B
def _fake_refresh(state, **kwargs):
return {
**state,
"access_token": "b-access-tok",
"refresh_token": "b-refresh-tok",
"agent_key": "b-agent-key",
"agent_key_expires_at": "2026-04-19T22:00:00+00:00",
}
monkeypatch.setattr(auth_mod, "refresh_nous_oauth_from_state", _fake_refresh)
result = auth_mod._try_import_shared_nous_state()
assert result is not None
auth_mod.persist_nous_credentials(result)
b_payload = json.loads((profile_b / "auth.json").read_text())
assert "nous" in b_payload.get("providers", {})
assert b_payload["providers"]["nous"]["refresh_token"] == "b-refresh-tok"
# Shared store was updated with the rotated refresh_token too
shared_after = auth_mod._read_shared_nous_state()
assert shared_after is not None
assert shared_after["refresh_token"] == "b-refresh-tok"
def test_runtime_refresh_uses_newer_shared_token_before_local_stale_token(
tmp_path, monkeypatch, shared_store_env,
):
"""A sibling profile may rotate the single-use Nous refresh token.
When this profile later wakes with an expired local token, runtime
resolution must adopt the shared token before refreshing. Otherwise it
can submit the stale local refresh token and trigger portal reuse
revocation for the whole shared session.
"""
from hermes_cli import auth as auth_mod
profile_b = tmp_path / "profile_b"
_setup_nous_auth(
profile_b,
access_token="local-expired-access",
refresh_token="local-stale-refresh",
)
monkeypatch.setenv("HERMES_HOME", str(profile_b))
shared_state = _full_state_fixture()
shared_state["access_token"] = "shared-fresh-access"
shared_state["refresh_token"] = "shared-fresh-refresh"
shared_state["expires_at"] = "2099-01-01T00:00:00+00:00"
auth_mod._write_shared_nous_state(shared_state)
def _refresh_should_not_happen(**_kwargs):
raise AssertionError("stale profile-local refresh token was used")
minted_with: list[str] = []
def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
minted_with.append(access_token)
return _mint_payload(api_key="agent-key-from-shared-token")
monkeypatch.setattr(auth_mod, "_refresh_access_token", _refresh_should_not_happen)
monkeypatch.setattr(auth_mod, "_mint_agent_key", _fake_mint_agent_key)
creds = auth_mod.resolve_nous_runtime_credentials(
min_key_ttl_seconds=300,
force_mint=True,
)
assert creds["api_key"] == "agent-key-from-shared-token"
assert minted_with == ["shared-fresh-access"]
profile_state = auth_mod.get_provider_auth_state("nous")
assert profile_state is not None
assert profile_state["refresh_token"] == "shared-fresh-refresh"
assert profile_state["access_token"] == "shared-fresh-access"
def test_managed_gateway_access_token_uses_newer_shared_token(
tmp_path, monkeypatch, shared_store_env,
):
"""Managed-tool token reads share the same stale-refresh-token hazard."""
from hermes_cli import auth as auth_mod
profile_b = tmp_path / "profile_b"
_setup_nous_auth(
profile_b,
access_token="local-expired-access",
refresh_token="local-stale-refresh",
)
monkeypatch.setenv("HERMES_HOME", str(profile_b))
shared_state = _full_state_fixture()
shared_state["access_token"] = "shared-fresh-access"
shared_state["refresh_token"] = "shared-fresh-refresh"
shared_state["expires_at"] = "2099-01-01T00:00:00+00:00"
auth_mod._write_shared_nous_state(shared_state)
def _refresh_should_not_happen(**_kwargs):
raise AssertionError("stale profile-local refresh token was used")
monkeypatch.setattr(auth_mod, "_refresh_access_token", _refresh_should_not_happen)
assert auth_mod.resolve_nous_access_token() == "shared-fresh-access"
profile_state = auth_mod.get_provider_auth_state("nous")
assert profile_state is not None
assert profile_state["refresh_token"] == "shared-fresh-refresh"

View file

@ -0,0 +1,360 @@
"""Tests for cross-profile auth fallback.
When ``HERMES_HOME`` points to a named profile, ``read_credential_pool()``
and ``get_provider_auth_state()`` fall back to the global-root
``auth.json`` per-provider when the profile has no entries for that
provider. Writes still target the profile only.
See the #18594 follow-up report: profile workers couldn't see providers
authenticated only at the global root.
"""
from __future__ import annotations
import json
from pathlib import Path
import pytest
def _make_auth_store(pool: dict | None = None, providers: dict | None = None) -> dict:
store: dict = {"version": 1}
if pool is not None:
store["credential_pool"] = pool
if providers is not None:
store["providers"] = providers
return store
@pytest.fixture()
def profile_env(tmp_path, monkeypatch):
"""Set up a global root + an active profile under Path.home()/.hermes/profiles/coder.
* Path.home() -> tmp_path
* Global root -> tmp_path/.hermes (has its own auth.json fixture)
* Profile -> tmp_path/.hermes/profiles/coder (active, HERMES_HOME points here)
This mirrors the real "named profile mounted under the default root"
layout that profile users actually have on disk.
"""
monkeypatch.setattr(Path, "home", lambda: tmp_path)
global_root = tmp_path / ".hermes"
global_root.mkdir()
profile_dir = global_root / "profiles" / "coder"
profile_dir.mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(profile_dir))
return {"global": global_root, "profile": profile_dir}
def _write(path: Path, payload: dict) -> None:
path.write_text(json.dumps(payload, indent=2))
# ---------------------------------------------------------------------------
# read_credential_pool — provider-slice reads
# ---------------------------------------------------------------------------
def test_profile_with_zero_entries_falls_back_to_global(profile_env):
"""Empty profile pool inherits the global-root entries for that provider."""
from hermes_cli.auth import read_credential_pool
_write(profile_env["global"] / "auth.json", _make_auth_store(pool={
"openrouter": [{
"id": "glob-1",
"label": "global-key",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-or-global",
}],
}))
# Profile auth.json: exists but has no openrouter entries.
_write(profile_env["profile"] / "auth.json", _make_auth_store(pool={}))
entries = read_credential_pool("openrouter")
assert len(entries) == 1
assert entries[0]["id"] == "glob-1"
assert entries[0]["access_token"] == "sk-or-global"
def test_profile_with_entries_fully_shadows_global(profile_env):
"""Once the profile has any entries for a provider, global is ignored."""
from hermes_cli.auth import read_credential_pool
_write(profile_env["global"] / "auth.json", _make_auth_store(pool={
"openrouter": [{
"id": "glob-1",
"label": "global-key",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-or-global",
}],
}))
_write(profile_env["profile"] / "auth.json", _make_auth_store(pool={
"openrouter": [{
"id": "prof-1",
"label": "profile-key",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-or-profile",
}],
}))
entries = read_credential_pool("openrouter")
assert len(entries) == 1
assert entries[0]["id"] == "prof-1"
assert entries[0]["access_token"] == "sk-or-profile"
def test_per_provider_shadowing_is_independent(profile_env):
"""Profile can override one provider while inheriting another from global."""
from hermes_cli.auth import read_credential_pool
_write(profile_env["global"] / "auth.json", _make_auth_store(pool={
"openrouter": [{
"id": "glob-or",
"label": "global-or",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-or-global",
}],
"anthropic": [{
"id": "glob-ant",
"label": "global-ant",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-ant-global",
}],
}))
_write(profile_env["profile"] / "auth.json", _make_auth_store(pool={
# Profile has openrouter only — anthropic should still fall back.
"openrouter": [{
"id": "prof-or",
"label": "profile-or",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-or-profile",
}],
}))
or_entries = read_credential_pool("openrouter")
ant_entries = read_credential_pool("anthropic")
assert [e["id"] for e in or_entries] == ["prof-or"]
assert [e["id"] for e in ant_entries] == ["glob-ant"]
def test_missing_global_auth_file_is_safe(profile_env):
"""Profile processes that never had a global auth.json still work."""
from hermes_cli.auth import read_credential_pool
# No global auth.json written at all.
_write(profile_env["profile"] / "auth.json", _make_auth_store(pool={
"openrouter": [{
"id": "prof-1",
"label": "profile",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-profile",
}],
}))
assert read_credential_pool("openrouter")[0]["id"] == "prof-1"
assert read_credential_pool("anthropic") == []
def test_malformed_global_auth_file_does_not_break_profile_read(profile_env):
(profile_env["global"] / "auth.json").write_text("{not valid json")
_write(profile_env["profile"] / "auth.json", _make_auth_store(pool={
"openrouter": [{
"id": "prof-1",
"label": "profile",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-profile",
}],
}))
from hermes_cli.auth import read_credential_pool
# Profile reads still work; malformed global is silently ignored.
assert read_credential_pool("openrouter")[0]["id"] == "prof-1"
# And no fallback for anthropic since global is unreadable.
assert read_credential_pool("anthropic") == []
# ---------------------------------------------------------------------------
# read_credential_pool — whole-pool reads (provider_id=None)
# ---------------------------------------------------------------------------
def test_whole_pool_merges_global_providers_when_missing_locally(profile_env):
from hermes_cli.auth import read_credential_pool
_write(profile_env["global"] / "auth.json", _make_auth_store(pool={
"openrouter": [{
"id": "glob-or",
"label": "global-or",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-or-global",
}],
"anthropic": [{
"id": "glob-ant",
"label": "global-ant",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-ant-global",
}],
}))
_write(profile_env["profile"] / "auth.json", _make_auth_store(pool={
"openrouter": [{
"id": "prof-or",
"label": "profile-or",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-or-profile",
}],
}))
pool = read_credential_pool(None)
# Profile wins for openrouter, global fills in anthropic.
assert [e["id"] for e in pool["openrouter"]] == ["prof-or"]
assert [e["id"] for e in pool["anthropic"]] == ["glob-ant"]
# ---------------------------------------------------------------------------
# get_provider_auth_state — singleton fallback
# ---------------------------------------------------------------------------
def test_provider_auth_state_falls_back_to_global_when_profile_has_none(profile_env):
from hermes_cli.auth import get_provider_auth_state
_write(profile_env["global"] / "auth.json", _make_auth_store(providers={
"nous": {"access_token": "nous-global", "refresh_token": "rt-global"},
}))
_write(profile_env["profile"] / "auth.json", _make_auth_store(providers={}))
state = get_provider_auth_state("nous")
assert state is not None
assert state["access_token"] == "nous-global"
def test_provider_auth_state_profile_wins_when_present(profile_env):
from hermes_cli.auth import get_provider_auth_state
_write(profile_env["global"] / "auth.json", _make_auth_store(providers={
"nous": {"access_token": "nous-global"},
}))
_write(profile_env["profile"] / "auth.json", _make_auth_store(providers={
"nous": {"access_token": "nous-profile"},
}))
state = get_provider_auth_state("nous")
assert state is not None
assert state["access_token"] == "nous-profile"
def test_provider_auth_state_returns_none_when_neither_has_it(profile_env):
from hermes_cli.auth import get_provider_auth_state
_write(profile_env["global"] / "auth.json", _make_auth_store(providers={}))
_write(profile_env["profile"] / "auth.json", _make_auth_store(providers={}))
assert get_provider_auth_state("nous") is None
# ---------------------------------------------------------------------------
# Classic mode — no fallback path should ever trigger
# ---------------------------------------------------------------------------
def test_classic_mode_does_not_double_read_same_file(tmp_path, monkeypatch):
"""In classic mode (HERMES_HOME == global root), no fallback path runs.
This guards against the merge accidentally duplicating entries when the
profile and global resolve to the same directory.
"""
# Put Path.home() under a subdir so the seat belt in _auth_file_path()
# sees tmp_path/home/.hermes as the "real home" — which is NOT equal
# to the HERMES_HOME we set (tmp_path/classic), so the guard passes.
fake_home = tmp_path / "home"
fake_home.mkdir()
monkeypatch.setattr(Path, "home", lambda: fake_home)
hermes_home = tmp_path / "classic"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
_write(hermes_home / "auth.json", _make_auth_store(pool={
"openrouter": [{
"id": "only",
"label": "classic",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-classic",
}],
}))
from hermes_cli.auth import read_credential_pool, _global_auth_file_path
# Classic mode: HERMES_HOME is set to a custom path that is NOT under
# ~/.hermes/profiles/ — get_default_hermes_root() returns HERMES_HOME
# itself, so the profile root and global root are the same directory,
# and the helper correctly returns None (no fallback).
assert _global_auth_file_path() is None
# And the read should return exactly one entry (not two).
entries = read_credential_pool("openrouter")
assert len(entries) == 1
assert entries[0]["id"] == "only"
# ---------------------------------------------------------------------------
# Writes stay scoped to the profile
# ---------------------------------------------------------------------------
def test_write_credential_pool_targets_profile_not_global(profile_env):
from hermes_cli.auth import read_credential_pool, write_credential_pool
_write(profile_env["global"] / "auth.json", _make_auth_store(pool={
"openrouter": [{
"id": "glob-1",
"label": "global",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-global",
}],
}))
write_credential_pool("openrouter", [{
"id": "prof-new",
"label": "profile-new",
"auth_type": "api_key",
"priority": 0,
"source": "manual",
"access_token": "sk-profile-new",
}])
# Global auth.json unchanged.
global_data = json.loads((profile_env["global"] / "auth.json").read_text())
assert global_data["credential_pool"]["openrouter"][0]["id"] == "glob-1"
# Profile auth.json holds the new entry.
profile_data = json.loads((profile_env["profile"] / "auth.json").read_text())
assert profile_data["credential_pool"]["openrouter"][0]["id"] == "prof-new"
# Subsequent read returns profile (shadows global).
assert [e["id"] for e in read_credential_pool("openrouter")] == ["prof-new"]

View file

@ -0,0 +1,202 @@
"""Regression tests for TOCTOU-safe credential file writers in ``hermes_cli.auth``.
Background
==========
The three writers below used to create a temp file via ``Path.write_text`` /
``Path.open('w')`` and only ``chmod``'d it to ``0o600`` afterward. Between
create and chmod the file existed at the process umask (typically ``0o644``),
briefly exposing OAuth tokens to other local users on multi-user hosts. The
fix switches them to ``os.open(O_EXCL, mode=0o600)`` + ``os.fdopen`` +
``fsync`` so the file is atomic at ``0o600`` on creation. Mirrors the fixes
shipped for ``agent/google_oauth.py`` (#19673) and ``tools/mcp_oauth.py``
(#21148).
These tests stay green only while the token file and its parent directory
end up at ``0o600`` / ``0o700`` after every write. POSIX-only the mode-bit
enforcement does not exist on Windows.
"""
from __future__ import annotations
import json
import os
import stat
import sys
from unittest.mock import patch
import pytest
pytestmark = pytest.mark.skipif(
sys.platform.startswith("win"),
reason="POSIX mode bits not enforced on Windows",
)
# ---------------------------------------------------------------------------
# _save_auth_store (~/.hermes/auth.json — every native OAuth provider)
# ---------------------------------------------------------------------------
def test_save_auth_store_writes_0o600_with_0o700_parent(tmp_path, monkeypatch):
"""``_save_auth_store`` must land ``auth.json`` at 0o600 and parent at 0o700."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
old_umask = os.umask(0o022) # make the race observable if it regresses
try:
from hermes_cli import auth as auth_mod
auth_store = {
"version": auth_mod.AUTH_STORE_VERSION,
"providers": {"openai-codex": {"tokens": {"access_token": "secret-x"}}},
"active_provider": "openai-codex",
}
auth_path = auth_mod._save_auth_store(auth_store)
finally:
os.umask(old_umask)
mode = stat.S_IMODE(auth_path.stat().st_mode)
parent_mode = stat.S_IMODE(auth_path.parent.stat().st_mode)
assert mode == 0o600, (
f"auth.json mode 0o{mode:o} != 0o600 — TOCTOU race regressed"
)
assert parent_mode == 0o700, (
f"auth.json parent dir mode 0o{parent_mode:o} != 0o700 — siblings can traverse"
)
# Content survived the rewrite
data = json.loads(auth_path.read_text())
assert data["providers"]["openai-codex"]["tokens"]["access_token"] == "secret-x"
# ---------------------------------------------------------------------------
# _save_qwen_cli_tokens (Qwen CLI OAuth tokens)
# ---------------------------------------------------------------------------
def test_save_qwen_cli_tokens_writes_0o600_with_0o700_parent(tmp_path, monkeypatch):
"""``_save_qwen_cli_tokens`` must land the token file at 0o600 and parent at 0o700."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
# The Qwen CLI auth path lives under $HOME/.qwen by default — isolate it.
monkeypatch.setenv("HOME", str(tmp_path))
old_umask = os.umask(0o022)
try:
from hermes_cli import auth as auth_mod
tokens = {
"access_token": "qwen-secret",
"refresh_token": "qwen-refresh",
"token_type": "Bearer",
"expiry_date": 123,
}
auth_path = auth_mod._save_qwen_cli_tokens(tokens)
finally:
os.umask(old_umask)
mode = stat.S_IMODE(auth_path.stat().st_mode)
parent_mode = stat.S_IMODE(auth_path.parent.stat().st_mode)
assert mode == 0o600, (
f"Qwen token file mode 0o{mode:o} != 0o600 — TOCTOU race regressed"
)
assert parent_mode == 0o700, (
f"Qwen token parent dir mode 0o{parent_mode:o} != 0o700"
)
data = json.loads(auth_path.read_text())
assert data["access_token"] == "qwen-secret"
# ---------------------------------------------------------------------------
# Nous shared-credential store write (inside _write_shared_nous_state)
# ---------------------------------------------------------------------------
def test_shared_nous_store_writes_0o600_with_0o700_parent(tmp_path, monkeypatch):
"""The Nous shared-credential store must land at 0o600 / parent 0o700."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
# _nous_shared_store_path() refuses to touch the real shared store during
# pytest runs; redirect it into tmp_path explicitly. Use a distinct
# subdirectory name (``shared_override``) so the guard's "real user
# home" reference — which currently tracks HERMES_HOME via
# get_default_hermes_root() — can't collide with our override and
# falsely claim we're writing to the real user's shared store.
monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(tmp_path / "shared_override"))
old_umask = os.umask(0o022)
try:
from hermes_cli import auth as auth_mod
state = {
"access_token": "nous-access-xxx",
"refresh_token": "nous-refresh-xxx",
"token_type": "Bearer",
"scope": "openid profile",
"client_id": "test-client",
"obtained_at": "2026-01-01T00:00:00Z",
"expires_at": "2026-01-01T01:00:00Z",
}
auth_mod._write_shared_nous_state(state)
path = auth_mod._nous_shared_store_path()
finally:
os.umask(old_umask)
assert path.exists(), "shared Nous store was not written"
mode = stat.S_IMODE(path.stat().st_mode)
parent_mode = stat.S_IMODE(path.parent.stat().st_mode)
assert mode == 0o600, (
f"Nous shared store mode 0o{mode:o} != 0o600 — TOCTOU race regressed"
)
assert parent_mode == 0o700, (
f"Nous shared store parent dir mode 0o{parent_mode:o} != 0o700"
)
data = json.loads(path.read_text())
assert data["refresh_token"] == "nous-refresh-xxx"
# ---------------------------------------------------------------------------
# Atomicity: verify ``os.open`` is called with an explicit 0o600 mode.
# ---------------------------------------------------------------------------
def test_save_auth_store_uses_os_open_with_0o600_mode(tmp_path, monkeypatch):
"""Regression: the writer must call ``os.open`` with an explicit restricted
mode so the file is created at 0o600 atomically closing the TOCTOU
window the previous ``Path.open('w')`` left open (fd inherited process
umask and was briefly 0o644 before post-write chmod)."""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
observed_opens: list[tuple[str, int, int]] = []
real_os_open = os.open
def spying_os_open(path, flags, mode=0o777, *args, **kwargs):
observed_opens.append((str(path), flags, mode))
return real_os_open(path, flags, mode, *args, **kwargs)
with patch.object(os, "open", spying_os_open):
from hermes_cli import auth as auth_mod
auth_mod._save_auth_store(
{"version": auth_mod.AUTH_STORE_VERSION, "providers": {}}
)
auth_tmp_opens = [
(p, fl, m) for (p, fl, m) in observed_opens if "auth.json.tmp" in p
]
assert auth_tmp_opens, (
f"os.open was never called for the auth.json temp file; "
f"observed={observed_opens!r}"
)
for path, flags, mode in auth_tmp_opens:
assert flags & os.O_CREAT, f"auth.json temp open missing O_CREAT: path={path}"
assert flags & os.O_EXCL, (
f"auth.json temp open missing O_EXCL — TOCTOU-safe pattern regressed: "
f"path={path}, flags={flags}"
)
# Must be exactly S_IRUSR | S_IWUSR (0o600) — no group/other bits.
expected = stat.S_IRUSR | stat.S_IWUSR
assert mode == expected, (
f"auth.json temp open mode 0o{mode:o} != 0o{expected:o}"
f"umask would apply and potentially expose tokens"
)

View file

@ -471,6 +471,32 @@ class TestImport:
with pytest.raises(SystemExit):
run_import(args)
@pytest.mark.skipif(os.name != "posix", reason="POSIX file permissions only")
def test_restores_secret_files_with_0600_perms(self, tmp_path, monkeypatch):
"""Secret files must end up at 0600 after restore (zipfile drops mode bits)."""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
zip_path = tmp_path / "backup.zip"
self._make_backup_zip(zip_path, {
"config.yaml": "model: openrouter\n",
".env": "OPENROUTER_API_KEY=sk-secret\n",
"auth.json": '{"providers": {"nous": "token"}}',
"state.db": b"SQLite format 3\x00",
"profiles/coder/.env": "ANTHROPIC_API_KEY=sk-ant-secret\n",
})
args = Namespace(zipfile=str(zip_path), force=True)
from hermes_cli.backup import run_import
run_import(args)
for rel in (".env", "auth.json", "state.db", "profiles/coder/.env"):
mode = (hermes_home / rel).stat().st_mode & 0o777
assert mode == 0o600, f"{rel} restored with mode {oct(mode)}, expected 0o600"
# ---------------------------------------------------------------------------
# Round-trip test
@ -1348,6 +1374,53 @@ class TestPreUpdateBackup:
from hermes_cli.backup import create_pre_update_backup
assert create_pre_update_backup(hermes_home=tmp_path / "does-not-exist") is None
def test_keep_zero_does_not_delete_freshly_created_backup(self, hermes_home):
"""Regression: ``backup_keep: 0`` previously triggered ``backups[0:]``
in the pruner wiping the just-created zip and leaving the user
with no recovery point. The floor (keep>=1) preserves the new file
regardless of misconfiguration; users who don't want backups should
set ``pre_update_backup: false`` instead.
"""
from hermes_cli.backup import create_pre_update_backup
out = create_pre_update_backup(hermes_home=hermes_home, keep=0)
assert out is not None
assert out.exists(), (
"keep=0 silently deleted the freshly-created backup; floor "
"should preserve the just-written file."
)
def test_keep_negative_does_not_delete_freshly_created_backup(self, hermes_home):
"""Mirror coverage: any value <1 should be floored, not literally
applied as a slice index."""
from hermes_cli.backup import create_pre_update_backup
out = create_pre_update_backup(hermes_home=hermes_home, keep=-3)
assert out is not None
assert out.exists()
def test_keep_zero_still_prunes_older_backups(self, hermes_home):
"""The floor preserves the new backup but should NOT regress the
rotation behaviour for older zips: a third call with keep=0 must
still remove pre-existing backups beyond the (floored) limit of 1.
"""
import time as _t
from hermes_cli.backup import create_pre_update_backup
first = create_pre_update_backup(hermes_home=hermes_home, keep=5)
_t.sleep(1.05)
second = create_pre_update_backup(hermes_home=hermes_home, keep=5)
_t.sleep(1.05)
third = create_pre_update_backup(hermes_home=hermes_home, keep=0)
remaining = {
p.name for p in (hermes_home / "backups").iterdir()
if p.name.startswith("pre-update-")
}
assert third.name in remaining, "Floor must preserve the new backup"
assert first.name not in remaining and second.name not in remaining, (
f"keep=0 floor of 1 should still prune older backups; "
f"remaining={remaining}"
)
class TestRunPreUpdateBackup:
"""Tests for the ``_run_pre_update_backup`` wrapper in main.py —

View file

@ -203,6 +203,30 @@ class TestListAuthenticatedProvidersBedrock:
bedrock = next((p for p in providers if p["slug"] == "bedrock"), None)
assert bedrock is None, "bedrock should NOT appear when AWS credentials are absent"
def test_non_bedrock_picker_does_not_probe_full_aws_chain(self, monkeypatch):
"""Non-Bedrock provider discovery must not touch boto3's full credential chain."""
from hermes_cli.model_switch import list_authenticated_providers
monkeypatch.delenv("AWS_PROFILE", raising=False)
monkeypatch.delenv("AWS_ACCESS_KEY_ID", raising=False)
monkeypatch.delenv("AWS_SECRET_ACCESS_KEY", raising=False)
monkeypatch.delenv("AWS_BEARER_TOKEN_BEDROCK", raising=False)
monkeypatch.delenv("AWS_WEB_IDENTITY_TOKEN_FILE", raising=False)
monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_RELATIVE_URI", raising=False)
monkeypatch.delenv("AWS_CONTAINER_CREDENTIALS_FULL_URI", raising=False)
calls = {"has_aws_credentials": 0}
def _has_aws_credentials():
calls["has_aws_credentials"] += 1
return False
with patch("agent.bedrock_adapter.has_aws_credentials", side_effect=_has_aws_credentials):
providers = list_authenticated_providers(current_provider="openrouter", max_models=0)
assert calls["has_aws_credentials"] == 0
assert all(p["slug"] != "bedrock" for p in providers)
def test_bedrock_falls_back_to_curated_when_discovery_fails(self, monkeypatch):
"""When discover_bedrock_models() raises, fall back to curated list without crashing."""
from hermes_cli.model_switch import list_authenticated_providers

View file

@ -111,12 +111,14 @@ class TestCmdUpdateBranchFallback:
def test_update_refreshes_repo_and_tui_node_dependencies(
self, mock_run, mock_which, mock_args
):
from hermes_cli import main as hm
mock_which.side_effect = {"uv": "/usr/bin/uv", "npm": "/usr/bin/npm"}.get
mock_run.side_effect = _make_run_side_effect(
branch="main", verify_ok=True, commit_count="1"
)
cmd_update(mock_args)
with patch.object(hm, "_is_termux_env", return_value=False):
cmd_update(mock_args)
npm_calls = [
(call.args[0], call.kwargs.get("cwd"))
@ -136,21 +138,28 @@ class TestCmdUpdateBranchFallback:
"--no-audit",
"--progress=false",
]
assert npm_calls == [
assert npm_calls[:2] == [
(full_flags, PROJECT_ROOT),
(full_flags, PROJECT_ROOT / "ui-tui"),
(["/usr/bin/npm", "ci", "--silent"], PROJECT_ROOT / "web"),
(["/usr/bin/npm", "run", "build"], PROJECT_ROOT / "web"),
]
if len(npm_calls) > 2:
assert npm_calls[2:] == [
(["/usr/bin/npm", "ci", "--silent"], PROJECT_ROOT / "web"),
(["/usr/bin/npm", "run", "build"], PROJECT_ROOT / "web"),
]
def test_update_non_interactive_skips_migration_prompt(self, mock_args, capsys):
"""When stdin/stdout aren't TTYs, config migration prompt is skipped."""
def test_update_non_interactive_runs_safe_config_migrations(self, mock_args, capsys):
"""Dashboard/web updates apply non-interactive migrations before restart."""
with patch("shutil.which", return_value=None), patch(
"subprocess.run"
) as mock_run, patch("builtins.input") as mock_input, patch(
"hermes_cli.config.get_missing_env_vars", return_value=["MISSING_KEY"]
), patch("hermes_cli.config.get_missing_config_fields", return_value=[]), patch(
"hermes_cli.config.check_config_version", return_value=(1, 2)
), patch(
"hermes_cli.config.get_missing_config_fields",
return_value=[{"key": "new.option", "default": True}],
), patch("hermes_cli.config.check_config_version", return_value=(1, 2)), patch(
"hermes_cli.config.migrate_config",
return_value={"env_added": [], "config_added": ["new.option"]},
), patch("hermes_cli.main.sys") as mock_sys:
mock_sys.stdin.isatty.return_value = False
mock_sys.stdout.isatty.return_value = False
@ -161,5 +170,119 @@ class TestCmdUpdateBranchFallback:
cmd_update(mock_args)
mock_input.assert_not_called()
from hermes_cli.config import migrate_config
migrate_config.assert_called_once_with(interactive=False, quiet=False)
captured = capsys.readouterr()
assert "Non-interactive session" in captured.out
assert "applying safe config migrations" in captured.out
assert "API keys require manual entry" in captured.out
class TestCmdUpdateProfileSkillSync:
"""cmd_update syncs bundled skills to all profiles, including the active one.
Regression guard for #16176: previously the active profile was excluded
from the seed_profile_skills loop, leaving it on stale skill content.
"""
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_active_profile_included_in_skill_sync(
self, mock_run, _mock_which, mock_args, capsys
):
from pathlib import Path
mock_run.side_effect = _make_run_side_effect(
branch="main", verify_ok=True, commit_count="1"
)
default_p = SimpleNamespace(name="default", path=Path("/fake/.hermes"))
active_p = SimpleNamespace(name="bit", path=Path("/fake/.hermes/profiles/bit"))
other_p = SimpleNamespace(name="work", path=Path("/fake/.hermes/profiles/work"))
all_profiles = [default_p, active_p, other_p]
synced_paths = []
def fake_seed(path, quiet=False):
synced_paths.append(path)
return {"copied": [], "updated": [], "user_modified": []}
empty_sync = {"copied": [], "updated": [], "user_modified": [], "cleaned": []}
with (
patch("hermes_cli.profiles.list_profiles", return_value=all_profiles),
patch("hermes_cli.profiles.seed_profile_skills", side_effect=fake_seed),
patch("tools.skills_sync.sync_skills", return_value=empty_sync),
):
cmd_update(mock_args)
assert active_p.path in synced_paths, (
f"Active profile 'bit' must be included in skill sync; got: {synced_paths}"
)
assert set(synced_paths) == {p.path for p in all_profiles}, (
f"All profiles must be synced; got: {synced_paths}"
)
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_single_profile_default_is_synced(
self, mock_run, _mock_which, mock_args, capsys
):
from pathlib import Path
mock_run.side_effect = _make_run_side_effect(
branch="main", verify_ok=True, commit_count="1"
)
default_p = SimpleNamespace(name="default", path=Path("/fake/.hermes"))
synced_paths = []
def fake_seed(path, quiet=False):
synced_paths.append(path)
return {"copied": [], "updated": [], "user_modified": []}
empty_sync = {"copied": [], "updated": [], "user_modified": [], "cleaned": []}
with (
patch("hermes_cli.profiles.list_profiles", return_value=[default_p]),
patch("hermes_cli.profiles.seed_profile_skills", side_effect=fake_seed),
patch("tools.skills_sync.sync_skills", return_value=empty_sync),
):
cmd_update(mock_args)
assert default_p.path in synced_paths
def test_is_termux_env_true_for_termux_prefix():
from hermes_cli import main as hm
assert hm._is_termux_env({"PREFIX": "/data/data/com.termux/files/usr"}) is True
def test_is_termux_env_false_for_non_termux_prefix():
from hermes_cli import main as hm
assert hm._is_termux_env({"PREFIX": "/usr/local"}) is False
def test_load_installable_optional_extras_supports_termux_group(tmp_path, monkeypatch):
from hermes_cli import main as hm
pyproject = tmp_path / "pyproject.toml"
pyproject.write_text(
"""
[project]
name = "x"
version = "0.0.0"
[project.optional-dependencies]
all = ["x[mcp]"]
termux-all = ["x[termux]", "x[mcp]"]
mcp = ["mcp>=1"]
termux = ["rich>=14"]
""".strip()
)
monkeypatch.setattr(hm, "PROJECT_ROOT", tmp_path)
assert hm._load_installable_optional_extras(group="all") == ["mcp"]
assert hm._load_installable_optional_extras(group="termux-all") == ["termux", "mcp"]

View file

@ -75,6 +75,37 @@ def test_normal_path_still_works(hermes_auth_only_env):
assert "openai-codex" in slugs
def test_codex_picker_uses_live_codex_catalog(hermes_auth_only_env, tmp_path, monkeypatch):
"""The gateway /model picker should surface Codex CLI-only listed models."""
from hermes_cli.model_switch import list_authenticated_providers
codex_home = tmp_path / "codex-home"
codex_home.mkdir()
(codex_home / "models_cache.json").write_text(json.dumps({
"models": [
{"slug": "gpt-5.5", "priority": 0, "supported_in_api": True},
{"slug": "gpt-5.3-codex-spark", "priority": 7, "supported_in_api": False},
]
}))
monkeypatch.setenv("CODEX_HOME", str(codex_home))
# Force the cache fallback path — without this the test issues a real
# 10s HTTP probe to chatgpt.com/backend-api/codex/models which is both
# slow and non-deterministic in CI/sandboxed environments.
monkeypatch.setattr(
"hermes_cli.codex_models._fetch_models_from_api",
lambda access_token: [],
)
providers = list_authenticated_providers(
current_provider="openai-codex",
max_models=10,
)
codex = next(p for p in providers if p["slug"] == "openai-codex")
assert "gpt-5.3-codex-spark" in codex["models"]
assert codex["total_models"] == len(codex["models"])
@pytest.fixture()
def claude_code_only_env(tmp_path, monkeypatch):
"""Set up an environment where Anthropic credentials only exist in

View file

@ -1,10 +1,6 @@
import json
import os
import sys
from unittest.mock import patch
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from hermes_cli.codex_models import DEFAULT_CODEX_MODELS, get_codex_model_ids
@ -17,6 +13,7 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch
{
"models": [
{"slug": "gpt-5.3-codex", "priority": 20, "supported_in_api": True},
{"slug": "gpt-5.3-codex-spark", "priority": 6, "supported_in_api": False},
{"slug": "gpt-5.1-codex", "priority": 5, "supported_in_api": True},
{"slug": "gpt-5.4", "priority": 1, "supported_in_api": True},
{"slug": "gpt-5-hidden-codex", "priority": 2, "visibility": "hidden"},
@ -31,6 +28,9 @@ def test_get_codex_model_ids_prioritizes_default_and_cache(tmp_path, monkeypatch
assert models[0] == "gpt-5.2-codex"
assert "gpt-5.1-codex" in models
assert "gpt-5.3-codex" in models
# Codex CLI marks Spark unsupported in the public API, but the Codex
# backend still accepts it via the OAuth-backed CLI/Hermes route.
assert "gpt-5.3-codex-spark" in models
# Non-codex-suffixed models are included when the cache says they're available
assert "gpt-5.4" in models
assert "gpt-5.4-mini" in models
@ -54,7 +54,7 @@ def test_get_codex_model_ids_falls_back_to_curated_defaults(tmp_path, monkeypatc
assert models[: len(DEFAULT_CODEX_MODELS)] == DEFAULT_CODEX_MODELS
assert "gpt-5.4" in models
assert "gpt-5.3-codex-spark" not in models
assert "gpt-5.3-codex-spark" in models
def test_get_codex_model_ids_adds_forward_compat_models_from_templates(monkeypatch):
@ -65,7 +65,49 @@ def test_get_codex_model_ids_adds_forward_compat_models_from_templates(monkeypat
models = get_codex_model_ids(access_token="codex-access-token")
assert models == ["gpt-5.2-codex", "gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex"]
assert models == [
"gpt-5.2-codex",
"gpt-5.4-mini",
"gpt-5.4",
"gpt-5.3-codex",
"gpt-5.3-codex-spark",
]
def test_fetch_from_api_keeps_supported_in_api_false_models(monkeypatch):
"""Regression: gpt-5.3-codex-spark is returned by the live Codex backend
with ``supported_in_api: false`` because it isn't in the public OpenAI
API. The Codex CLI / OAuth route still serves it for ChatGPT Pro
accounts, so we must not drop it on that flag. visibility=hidden is
the separate signal that *should* still filter entries out.
"""
import sys
from hermes_cli import codex_models
class _FakeResp:
status_code = 200
def json(self):
return {
"models": [
{"slug": "gpt-5.5", "priority": 0, "supported_in_api": True},
{"slug": "gpt-5.3-codex-spark", "priority": 7, "supported_in_api": False},
{"slug": "gpt-5-internal", "priority": 99, "visibility": "hidden"},
]
}
class _FakeHttpx:
@staticmethod
def get(url, headers=None, timeout=None):
return _FakeResp()
monkeypatch.setitem(sys.modules, "httpx", _FakeHttpx)
models = codex_models._fetch_models_from_api(access_token="tok")
assert "gpt-5.5" in models
assert "gpt-5.3-codex-spark" in models
assert "gpt-5-internal" not in models
def test_model_command_uses_runtime_access_token_for_codex_list(monkeypatch):

View file

@ -13,6 +13,7 @@ from hermes_cli.commands import (
SlashCommandAutoSuggest,
SlashCommandCompleter,
_CMD_NAME_LIMIT,
_SLACK_RESERVED_COMMANDS,
_TG_NAME_LIMIT,
_clamp_command_names,
_clamp_telegram_names,
@ -108,6 +109,12 @@ class TestResolveCommand:
assert resolve_command("reload_mcp").name == "reload-mcp"
assert resolve_command("tasks").name == "agents"
def test_topic_is_gateway_command(self):
topic = resolve_command("topic")
assert topic is not None
assert topic.name == "topic"
assert "topic" in GATEWAY_KNOWN_COMMANDS
def test_leading_slash_stripped(self):
assert resolve_command("/help").name == "help"
assert resolve_command("/bg").name == "background"
@ -235,6 +242,13 @@ class TestTelegramBotCommands:
tg_name = cmd.name.replace("-", "_")
assert tg_name not in names
def test_excludes_commands_with_required_args(self):
names = {name for name, _ in telegram_bot_commands()}
assert "background" not in names
assert "queue" not in names
assert "steer" not in names
assert "background" in GATEWAY_KNOWN_COMMANDS
class TestSlackSubcommandMap:
def test_returns_dict(self):
@ -299,9 +313,19 @@ class TestSlackNativeSlashes:
def test_includes_canonical_commands(self):
names = {n for n, _d, _h in slack_native_slashes()}
# Sample of gateway-available canonical commands
for expected in ("new", "stop", "background", "model", "help", "status"):
for expected in ("new", "stop", "background", "model", "help"):
assert expected in names, f"missing canonical /{expected}"
def test_excludes_slack_reserved_commands(self):
"""Slack built-in commands (e.g. /status, /me, /join) cannot be
registered by apps and must be excluded from the manifest.
Users can still reach them via /hermes <command>."""
names = {n for n, _d, _h in slack_native_slashes()}
for reserved in _SLACK_RESERVED_COMMANDS:
assert reserved not in names, (
f"/{reserved} is a Slack built-in and must not appear in the manifest"
)
def test_includes_aliases_as_first_class_slashes(self):
"""Aliases (/btw, /bg, /reset, /q) must be registered as standalone
slashes this is the whole point of native-slashes parity."""
@ -319,6 +343,9 @@ class TestSlackNativeSlashes:
Telegram but not Slack (because of Slack's 50-slash cap), this
test fails loudly so we can curate the list rather than silently
dropping parity.
Slack-reserved built-in commands (e.g. /status) are excluded
from parity checks since they cannot be registered on Slack.
"""
slack_names = {n for n, _d, _h in slack_native_slashes()}
tg_names = {n for n, _d in telegram_bot_commands()}
@ -329,7 +356,8 @@ class TestSlackNativeSlashes:
slack_norm = {_norm(n) for n in slack_names}
tg_norm = {_norm(n) for n in tg_names}
missing = tg_norm - slack_norm
reserved_norm = {_norm(n) for n in _SLACK_RESERVED_COMMANDS}
missing = (tg_norm - slack_norm) - reserved_norm
assert not missing, (
f"commands on Telegram but missing from Slack native slashes: {sorted(missing)}"
)
@ -405,6 +433,21 @@ class TestGatewayConfigGate:
joined = "\n".join(lines)
assert "`/verbose" in joined
def test_config_gate_quoted_false_stays_disabled_everywhere(self, tmp_path, monkeypatch):
"""Quoted false must not enable config-gated gateway commands."""
config_file = tmp_path / "config.yaml"
config_file.write_text('display:\n tool_progress_command: "false"\n')
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
lines = gateway_help_lines()
joined = "\n".join(lines)
names = {name for name, _ in telegram_bot_commands()}
mapping = slack_subcommand_map()
assert "`/verbose" not in joined
assert "verbose" not in names
assert "verbose" not in mapping
def test_config_gate_excluded_from_telegram_when_off(self, tmp_path, monkeypatch):
config_file = tmp_path / "config.yaml"
config_file.write_text("display:\n tool_progress_command: false\n")
@ -792,6 +835,103 @@ class TestClampTelegramNames:
assert result[0] == ("foo", "d1")
class TestClampCommandNamesTriples:
"""Tests for _clamp_command_names with 3-tuples (name, desc, cmd_key).
Skill entries pass through _clamp_command_names as 3-tuples so the
original cmd_key survives name truncation. Before the fix in PR #18951,
the code stripped cmd_key into a side-dict keyed by the *original*
(name, desc) pair after truncation the lookup key no longer matched,
silently losing the cmd_key.
"""
def test_short_triple_preserved(self):
entries = [("skill", "A skill", "/skill")]
result = _clamp_command_names(entries, set())
assert result == [("skill", "A skill", "/skill")]
def test_long_name_preserves_cmd_key(self):
long = "a" * 50
cmd_key = f"/{long}"
result = _clamp_command_names([(long, "desc", cmd_key)], set())
assert len(result) == 1
name, desc, key = result[0]
assert len(name) == _CMD_NAME_LIMIT
assert key == cmd_key, "cmd_key must survive name clamping"
def test_collision_preserves_cmd_key(self):
prefix = "x" * _CMD_NAME_LIMIT
long = "x" * 50
result = _clamp_command_names(
[(long, "desc", "/long-skill")], reserved={prefix},
)
assert len(result) == 1
name, _desc, key = result[0]
assert name == "x" * (_CMD_NAME_LIMIT - 1) + "0"
assert key == "/long-skill"
def test_multiple_long_names_preserve_respective_keys(self):
base = "y" * 40
entries = [
(base + "_alpha", "d1", "/alpha-skill"),
(base + "_beta", "d2", "/beta-skill"),
]
result = _clamp_command_names(entries, set())
assert len(result) == 2
assert result[0][2] == "/alpha-skill"
assert result[1][2] == "/beta-skill"
def test_backward_compat_with_pairs(self):
"""Legacy 2-tuple callers (Telegram) must still work."""
entries = [("help", "Show help"), ("status", "Show status")]
result = _clamp_command_names(entries, set())
assert result == entries
class TestDiscordSkillCmdKeyDispatch:
"""Integration: discord_skill_commands preserves cmd_key for long names.
This tests the full pipeline: skill_commands _collect_gateway_skill_entries
_clamp_command_names returned triples, verifying that skills with names
exceeding Discord's 32-char limit still have their original cmd_key for
dispatch.
"""
def test_long_skill_name_retains_cmd_key(self, tmp_path, monkeypatch):
from unittest.mock import patch
long_name = "this-is-a-very-long-skill-name-that-exceeds-limit"
cmd_key = f"/{long_name}"
fake_skills_dir = tmp_path / "skills"
fake_skills_dir.mkdir(exist_ok=True)
# Use resolved path — macOS /var → /private/var symlink
# causes SKILLS_DIR.resolve() to differ from tmp_path.
resolved_dir = str(fake_skills_dir.resolve())
fake_cmds = {
cmd_key: {
"name": long_name,
"description": "A skill with a long name",
"skill_md_path": f"{resolved_dir}/{long_name}/SKILL.md",
"skill_dir": f"{resolved_dir}/{long_name}",
},
}
with patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds), \
patch("tools.skills_tool.SKILLS_DIR", fake_skills_dir), \
patch("agent.skill_utils.get_external_skills_dirs", return_value=[]):
entries, hidden = discord_skill_commands(
max_slots=100, reserved_names=set(),
)
assert len(entries) == 1
name, desc, key = entries[0]
assert len(name) <= _CMD_NAME_LIMIT, "Name should be clamped to 32 chars"
assert key == cmd_key, (
f"cmd_key must be the original /{long_name}, got {key!r}"
)
class TestTelegramMenuCommands:
"""Integration: telegram_menu_commands enforces the 32-char limit."""
@ -869,6 +1009,73 @@ class TestTelegramMenuCommands:
assert "my_enabled_skill" in menu_names
assert "my_disabled_skill" not in menu_names
def test_external_dir_skills_included_in_telegram_menu(self, tmp_path, monkeypatch):
"""External skills (``skills.external_dirs``) must appear in the Telegram menu.
Regression test for #8110 — external skills were visible to the
agent and CLI but silently excluded from gateway slash menus
because ``_collect_gateway_skill_entries`` only accepted skills
whose path started with ``SKILLS_DIR``.
Also verifies the trailing-slash boundary: a directory that
simply shares a prefix with a configured ``external_dirs`` entry
(``/tmp/my-skills-extra`` vs ``/tmp/my-skills``) must NOT be
admitted.
"""
from unittest.mock import patch
local_dir = tmp_path / "skills"
local_dir.mkdir()
external_dir = tmp_path / "my-skills"
external_dir.mkdir()
lookalike_dir = tmp_path / "my-skills-extra"
lookalike_dir.mkdir()
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
(tmp_path / "config.yaml").write_text(
f"skills:\n external_dirs:\n - {external_dir}\n"
)
fake_cmds = {
"/local-one": {
"name": "local-one",
"description": "Local",
"skill_md_path": f"{local_dir}/local-one/SKILL.md",
"skill_dir": f"{local_dir}/local-one",
},
"/morning-briefing": {
"name": "morning-briefing",
"description": "External skill",
"skill_md_path": f"{external_dir}/morning-briefing/SKILL.md",
"skill_dir": f"{external_dir}/morning-briefing",
},
"/lookalike-skill": {
"name": "lookalike-skill",
"description": "Lives in a sibling dir that shares a prefix",
"skill_md_path": f"{lookalike_dir}/lookalike-skill/SKILL.md",
"skill_dir": f"{lookalike_dir}/lookalike-skill",
},
}
with (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
patch("tools.skills_tool.SKILLS_DIR", local_dir),
patch(
"agent.skill_utils.get_external_skills_dirs",
return_value=[external_dir],
),
):
menu, _ = telegram_menu_commands(max_commands=100)
menu_names = {n for n, _ in menu}
assert "local_one" in menu_names, "local skill must appear"
assert "morning_briefing" in menu_names, (
"external skill from skills.external_dirs must appear (fixes #8110)"
)
assert "lookalike_skill" not in menu_names, (
"prefix-match sibling directories must not be admitted"
)
def test_special_chars_in_skill_names_sanitized(self, tmp_path, monkeypatch):
"""Skills with +, /, or other special chars produce valid Telegram names."""
from unittest.mock import patch
@ -1323,6 +1530,119 @@ class TestDiscordSkillCommandsByCategory:
assert "vllm" in names
assert len(uncategorized) == 0
def test_no_legacy_25x25_cap(self, tmp_path, monkeypatch):
"""The old nested-layout caps (25 groups × 25 skills/group) are gone.
The live caller flattens categories into a single autocomplete list,
which Discord fetches dynamically the per-command 8KB payload
concern from the old nested layout (#11321, #10259) no longer applies.
Guards against accidentally re-introducing the caps, which would
silently drop skills in the 26th+ alphabetical category (the exact
failure mode users were hitting with 29 category dirs on real
installs).
"""
from unittest.mock import patch
fake_skills_dir = str(tmp_path / "skills")
# Build 30 categories (> old _MAX_GROUPS=25) each with 30 skills
# (> old _MAX_PER_GROUP=25).
fake_cmds = {}
for c in range(30):
cat = f"cat{c:02d}" # cat00, cat01, ..., cat29 — 30 categories
for s in range(30):
name = f"skill-{c:02d}-{s:02d}"
skill_subdir = tmp_path / "skills" / cat / name
skill_subdir.mkdir(parents=True, exist_ok=True)
(skill_subdir / "SKILL.md").write_text("---\nname: x\n---\n")
fake_cmds[f"/{name}"] = {
"name": name,
"description": f"Category {cat} skill {s}",
"skill_md_path": f"{fake_skills_dir}/{cat}/{name}/SKILL.md",
}
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
with (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
patch("tools.skills_tool.SKILLS_DIR", tmp_path / "skills"),
):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(),
)
# Every category should be present — no 25-group cap
assert len(categories) == 30, (
f"expected all 30 categories, got {len(categories)} "
f"(cap from old nested layout must be removed)"
)
# Every skill in every category must be present — no 25-per-group cap
for cat_name, entries in categories.items():
assert len(entries) == 30, (
f"category {cat_name}: expected 30 skills, got {len(entries)} "
f"(cap from old nested layout must be removed)"
)
# Nothing should be reported hidden for the cap reason (the only
# legitimate hidden reason now is name clamp collisions, which
# don't happen here since all names are unique).
assert hidden == 0
def test_external_dirs_skills_included(self, tmp_path, monkeypatch):
"""Skills in ``skills.external_dirs`` must appear in /skill autocomplete.
#18741 fixed this for the flat ``discord_skill_commands`` collector
but left ``discord_skill_commands_by_category`` (the live caller for
Discord's ``/skill`` command) still filtering by
``SKILLS_DIR`` prefix only. Regression guard that both collectors
now accept external-dir skills.
"""
from unittest.mock import patch
local_skills_dir = tmp_path / "local-skills"
external_dir = tmp_path / "external-skills"
(local_skills_dir / "creative" / "local-skill").mkdir(parents=True)
(local_skills_dir / "creative" / "local-skill" / "SKILL.md").write_text("")
(external_dir / "mlops" / "external-skill").mkdir(parents=True)
(external_dir / "mlops" / "external-skill" / "SKILL.md").write_text("")
fake_cmds = {
"/local-skill": {
"name": "local-skill",
"description": "Local",
"skill_md_path": str(local_skills_dir / "creative" / "local-skill" / "SKILL.md"),
},
"/external-skill": {
"name": "external-skill",
"description": "External",
"skill_md_path": str(external_dir / "mlops" / "external-skill" / "SKILL.md"),
},
}
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
with (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds),
patch("tools.skills_tool.SKILLS_DIR", local_skills_dir),
patch(
"agent.skill_utils.get_external_skills_dirs",
return_value=[external_dir],
),
):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(),
)
# Local skill → grouped under "creative"
assert "creative" in categories
assert any(n == "local-skill" for n, _d, _k in categories["creative"])
# External skill → grouped under its own top-level dir "mlops"
assert "mlops" in categories, (
"external-dir skills must be included — the old SKILLS_DIR-only "
"prefix check was broken for by_category (completes #18741)"
)
assert any(n == "external-skill" for n, _d, _k in categories["mlops"])
assert uncategorized == []
assert hidden == 0
# ---------------------------------------------------------------------------
# Plugin slash command integration
@ -1354,6 +1674,19 @@ class TestPluginCommandEnumeration:
names = {name for name, _desc in telegram_bot_commands()}
assert "metricas" in names
def test_plugin_command_with_required_args_excluded_from_telegram_menu(self, monkeypatch):
"""Telegram BotCommand selections cannot supply required arguments."""
self._patch_plugin_commands(monkeypatch, {
"background-job": {
"handler": lambda _a: "ok",
"description": "Run a background job",
"args_hint": "<prompt>",
"plugin": "jobs-plugin",
}
})
names = {name for name, _desc in telegram_bot_commands()}
assert "background_job" not in names
def test_plugin_command_appears_in_slack_subcommand_map(self, monkeypatch):
"""/hermes metricas must route through the Slack subcommand map."""
self._patch_plugin_commands(monkeypatch, {

View file

@ -81,6 +81,81 @@ class TestLoadConfigDefaults:
assert "max_turns" not in config
class TestLoadConfigParseFailure:
"""A YAML parse failure must NOT silently fall back to defaults.
Before issue #23570 this was a single ``print(...)`` that scrolled past
on the first invocation users saw aux-fallback misbehavior with no clue
their config.yaml was being ignored. The helper must:
* log at WARNING (so ``hermes logs`` surfaces it)
* also write to stderr (so it's visible at startup even before
``setup_logging()`` has wired up file handlers)
* dedup on (path, mtime_ns, size) so concurrent loads don't spam
* re-warn after the user edits the file (different mtime)
"""
def test_logs_and_warns_on_parse_failure(self, tmp_path, caplog, capsys):
# Reset the dedup cache so this test isn't affected by other tests
# that may have warned about a different broken config.
from hermes_cli import config as cfg_mod
cfg_mod._CONFIG_PARSE_WARNED.clear()
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
(tmp_path / "config.yaml").write_text("\tbroken tab indent:\n")
import logging
with caplog.at_level(logging.WARNING, logger="hermes_cli.config"):
config = load_config()
# Falls back to defaults — confirms the silent-fallback we're warning about
assert config["model"] == DEFAULT_CONFIG["model"]
# WARNING-level log was emitted with file path + reason
assert any(
str(tmp_path / "config.yaml") in rec.message
and "Falling back to default config" in rec.message
for rec in caplog.records
), f"expected WARNING log, got: {[r.message for r in caplog.records]}"
# stderr also got a user-visible message (with the ⚠️ marker so it
# stands out at hermes startup before logging is configured)
captured = capsys.readouterr()
assert "hermes config:" in captured.err
assert str(tmp_path / "config.yaml") in captured.err
def test_dedup_on_repeated_load_same_file(self, tmp_path, capsys):
from hermes_cli import config as cfg_mod
cfg_mod._CONFIG_PARSE_WARNED.clear()
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
(tmp_path / "config.yaml").write_text("\tbroken:\n")
load_config()
first = capsys.readouterr().err
assert "hermes config:" in first
load_config()
second = capsys.readouterr().err
assert second == "", "second load should NOT re-warn (same file, same mtime)"
def test_rewarns_after_file_edit(self, tmp_path, capsys):
import time
from hermes_cli import config as cfg_mod
cfg_mod._CONFIG_PARSE_WARNED.clear()
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):
(tmp_path / "config.yaml").write_text("\tbroken:\n")
load_config()
capsys.readouterr() # discard first warning
# Edit the file (still broken, but different content) — mtime changes
time.sleep(0.05)
(tmp_path / "config.yaml").write_text("\tstill broken differently:\n")
load_config()
after_edit = capsys.readouterr().err
assert "hermes config:" in after_edit, "edited file should re-warn"
class TestSaveAndLoadRoundtrip:
def test_roundtrip(self, tmp_path):
with patch.dict(os.environ, {"HERMES_HOME": str(tmp_path)}):

View file

@ -0,0 +1,269 @@
"""Tests for `hermes curator archive` and `hermes curator prune`.
Covers:
- archive refuses pinned skills with an `unpin` hint
- archive returns 0/1 based on archive_skill() success
- prune filters pinned and already-archived, applies --days threshold
- prune falls back to created_at when last_activity_at is null
- prune --dry-run makes no state changes
- prune --yes skips confirmation
- prune --days validation
"""
from __future__ import annotations
import io
from contextlib import redirect_stdout, redirect_stderr
from types import SimpleNamespace
from unittest.mock import patch
import pytest
def _ns(**kwargs):
return SimpleNamespace(**kwargs)
# ─── archive ────────────────────────────────────────────────────────────────
def test_archive_refuses_pinned(monkeypatch, capsys):
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
monkeypatch.setattr(skill_usage, "get_record", lambda name: {"pinned": True})
called = []
monkeypatch.setattr(
skill_usage, "archive_skill",
lambda name: called.append(name) or (True, "should not get here"),
)
rc = curator_cli._cmd_archive(_ns(skill="pinned-skill"))
assert rc == 1
assert called == []
out = capsys.readouterr().out
assert "pinned" in out.lower()
assert "hermes curator unpin" in out
def test_archive_calls_archive_skill(monkeypatch, capsys):
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
monkeypatch.setattr(skill_usage, "get_record", lambda name: {"pinned": False})
monkeypatch.setattr(
skill_usage, "archive_skill",
lambda name: (True, f"archived to .archive/{name}"),
)
rc = curator_cli._cmd_archive(_ns(skill="my-skill"))
assert rc == 0
assert "archived to .archive/my-skill" in capsys.readouterr().out
def test_archive_reports_failure(monkeypatch, capsys):
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
monkeypatch.setattr(skill_usage, "get_record", lambda name: {"pinned": False})
monkeypatch.setattr(
skill_usage, "archive_skill",
lambda name: (False, f"skill '{name}' is bundled or hub-installed; never archive"),
)
rc = curator_cli._cmd_archive(_ns(skill="hub-slug"))
assert rc == 1
assert "bundled or hub-installed" in capsys.readouterr().out
# ─── prune ──────────────────────────────────────────────────────────────────
def _mk_record(name, *, idle_days=0, pinned=False, state="active", created_idle_days=None):
import datetime as _dt
now = _dt.datetime.now(_dt.timezone.utc)
last_activity = (now - _dt.timedelta(days=idle_days)).isoformat() if idle_days else None
created_delta = created_idle_days if created_idle_days is not None else idle_days
created = (now - _dt.timedelta(days=created_delta)).isoformat()
return {
"name": name,
"state": state,
"pinned": pinned,
"last_activity_at": last_activity,
"created_at": created,
"activity_count": 0 if idle_days == 0 and last_activity is None else 1,
}
def test_prune_days_validation(monkeypatch, capsys):
import hermes_cli.curator as curator_cli
rc = curator_cli._cmd_prune(_ns(days=0, yes=True, dry_run=False))
assert rc == 2
err = capsys.readouterr().err
assert "--days must be >= 1" in err
def test_prune_nothing_to_do(monkeypatch, capsys):
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
monkeypatch.setattr(skill_usage, "agent_created_report", lambda: [])
rc = curator_cli._cmd_prune(_ns(days=30, yes=True, dry_run=False))
assert rc == 0
assert "nothing to prune" in capsys.readouterr().out
def test_prune_filters_pinned_and_archived(monkeypatch, capsys):
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
rows = [
_mk_record("old-pinned", idle_days=200, pinned=True),
_mk_record("old-archived", idle_days=200, state="archived"),
_mk_record("recent", idle_days=10),
_mk_record("old-active", idle_days=200),
]
monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
archived = []
monkeypatch.setattr(
skill_usage, "archive_skill",
lambda name: archived.append(name) or (True, f"archived {name}"),
)
rc = curator_cli._cmd_prune(_ns(days=30, yes=True, dry_run=False))
assert rc == 0
assert archived == ["old-active"]
out = capsys.readouterr().out
assert "old-active" in out
assert "old-pinned" not in out
assert "old-archived" not in out
assert "recent" not in out
assert "archived 1/1" in out
def test_prune_falls_back_to_created_at_when_never_used(monkeypatch, capsys):
"""Never-used skills must be prunable via created_at — otherwise immortal."""
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
rows = [_mk_record("never-used", idle_days=0, created_idle_days=200)]
# Force last_activity_at to None explicitly
rows[0]["last_activity_at"] = None
monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
archived = []
monkeypatch.setattr(
skill_usage, "archive_skill",
lambda name: archived.append(name) or (True, "ok"),
)
rc = curator_cli._cmd_prune(_ns(days=90, yes=True, dry_run=False))
assert rc == 0
assert archived == ["never-used"]
def test_prune_dry_run_makes_no_changes(monkeypatch, capsys):
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
rows = [_mk_record("old-skill", idle_days=200)]
monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
archived = []
monkeypatch.setattr(
skill_usage, "archive_skill",
lambda name: archived.append(name) or (True, "ok"),
)
rc = curator_cli._cmd_prune(_ns(days=30, yes=True, dry_run=True))
assert rc == 0
assert archived == []
out = capsys.readouterr().out
assert "old-skill" in out
assert "dry run" in out
def test_prune_prompts_without_yes(monkeypatch, capsys):
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
rows = [_mk_record("old-skill", idle_days=200)]
monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
archived = []
monkeypatch.setattr(
skill_usage, "archive_skill",
lambda name: archived.append(name) or (True, "ok"),
)
monkeypatch.setattr("builtins.input", lambda _prompt: "n")
rc = curator_cli._cmd_prune(_ns(days=30, yes=False, dry_run=False))
assert rc == 1
assert archived == []
assert "aborted" in capsys.readouterr().out
def test_prune_confirms_with_y(monkeypatch, capsys):
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
rows = [_mk_record("old-skill", idle_days=200)]
monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
archived = []
monkeypatch.setattr(
skill_usage, "archive_skill",
lambda name: archived.append(name) or (True, "ok"),
)
monkeypatch.setattr("builtins.input", lambda _prompt: "y")
rc = curator_cli._cmd_prune(_ns(days=30, yes=False, dry_run=False))
assert rc == 0
assert archived == ["old-skill"]
def test_prune_reports_partial_failure(monkeypatch, capsys):
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
rows = [
_mk_record("ok-skill", idle_days=200),
_mk_record("bad-skill", idle_days=200),
]
monkeypatch.setattr(skill_usage, "agent_created_report", lambda: rows)
def fake_archive(name):
if name == "bad-skill":
return False, "disk full"
return True, "ok"
monkeypatch.setattr(skill_usage, "archive_skill", fake_archive)
rc = curator_cli._cmd_prune(_ns(days=30, yes=True, dry_run=False))
assert rc == 1
out = capsys.readouterr().out
assert "archived 1/2" in out
assert "bad-skill: disk full" in out
# ─── argparse wiring ────────────────────────────────────────────────────────
def test_archive_and_prune_registered():
import argparse
import hermes_cli.curator as curator_cli
parser = argparse.ArgumentParser(prog="hermes curator")
curator_cli.register_cli(parser)
args = parser.parse_args(["archive", "my-skill"])
assert args.skill == "my-skill"
assert args.func.__name__ == "_cmd_archive"
args = parser.parse_args(["prune", "--days", "45", "--yes", "--dry-run"])
assert args.days == 45
assert args.yes is True
assert args.dry_run is True
assert args.func.__name__ == "_cmd_prune"
def test_prune_defaults():
import argparse
import hermes_cli.curator as curator_cli
parser = argparse.ArgumentParser(prog="hermes curator")
curator_cli.register_cli(parser)
args = parser.parse_args(["prune"])
assert args.days == 90
assert args.yes is False
assert args.dry_run is False

View file

@ -0,0 +1,162 @@
"""Tests for `_print_curator_recent_run_notice`.
The notice prints the most recent curator run summary on `hermes update`,
exactly once per run. Show-once is enforced by stamping
`last_run_summary_shown_at` in curator state after printing.
Why this matters: the curator runs in the background (gateway tick + CLI
session start) so users normally never see the rename map. `hermes update`
is the high-attention surface where consolidations should land.
"""
from __future__ import annotations
import importlib
from datetime import datetime, timedelta, timezone
from pathlib import Path
import pytest
@pytest.fixture
def curator_env(tmp_path, monkeypatch, capsys):
home = tmp_path / ".hermes"
home.mkdir()
(home / "skills").mkdir()
(home / "logs").mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
import hermes_constants
importlib.reload(hermes_constants)
from agent import curator
importlib.reload(curator)
from hermes_cli import main as hermes_main
importlib.reload(hermes_main)
yield {
"curator": curator,
"main": hermes_main,
"capsys": capsys,
}
def _set_state(curator_mod, **fields):
state = curator_mod.load_state()
state.update(fields)
curator_mod.save_state(state)
def test_silent_when_no_curator_run_yet(curator_env):
"""First-run notice handles this case; recent-run notice stays silent."""
curator_env["main"]._print_curator_recent_run_notice()
out = curator_env["capsys"].readouterr().out
assert "Skill curator — last run" not in out
def test_silent_when_summary_is_single_line(curator_env):
"""No archives = no rename map = nothing to surface. But still stamps shown."""
now = datetime.now(timezone.utc).isoformat()
_set_state(
curator_env["curator"],
last_run_at=now,
last_run_summary="auto: no changes; llm: no change",
)
curator_env["main"]._print_curator_recent_run_notice()
out = curator_env["capsys"].readouterr().out
assert "Skill curator — last run" not in out
# Should still mark shown so we don't reconsider on every update.
state = curator_env["curator"].load_state()
assert state["last_run_summary_shown_at"] == now
def test_prints_multiline_summary_with_rename_map(curator_env):
"""Multi-line summary (rename map appended) prints with timestamp + footer."""
now = datetime.now(timezone.utc).isoformat()
summary = (
"auto: 1 marked stale; llm: consolidated 2 into 1\n"
"archived 2 skill(s):\n"
" • pdf-extraction → document-tools\n"
" • docx-extraction → document-tools\n"
"full report: hermes curator status"
)
_set_state(
curator_env["curator"],
last_run_at=now,
last_run_summary=summary,
)
curator_env["main"]._print_curator_recent_run_notice()
out = curator_env["capsys"].readouterr().out
assert "Skill curator — last run" in out
assert "pdf-extraction → document-tools" in out
assert "docx-extraction → document-tools" in out
assert "shows once per curator run" in out
def test_show_once_semantics(curator_env):
"""Calling twice prints once; second call is silent until a new run lands."""
now = datetime.now(timezone.utc).isoformat()
summary = (
"auto: no changes; llm: consolidated 1 into 1\n"
"archived 1 skill(s):\n"
" • old → new\n"
"full report: hermes curator status"
)
_set_state(
curator_env["curator"],
last_run_at=now,
last_run_summary=summary,
)
curator_env["main"]._print_curator_recent_run_notice()
first = curator_env["capsys"].readouterr().out
assert "old → new" in first
curator_env["main"]._print_curator_recent_run_notice()
second = curator_env["capsys"].readouterr().out
assert second == "", "second call must be silent (already shown)"
def test_new_run_resets_show_once(curator_env):
"""A newer curator run with rename data prints again, even though one was already shown."""
older = (datetime.now(timezone.utc) - timedelta(hours=8)).isoformat()
_set_state(
curator_env["curator"],
last_run_at=older,
last_run_summary=(
"auto: no changes; llm: consolidated 1 into 1\n"
"archived 1 skill(s):\n"
" • thing-a → umbrella\n"
"full report: hermes curator status"
),
)
curator_env["main"]._print_curator_recent_run_notice()
curator_env["capsys"].readouterr() # drain
# New run lands.
newer = datetime.now(timezone.utc).isoformat()
_set_state(
curator_env["curator"],
last_run_at=newer,
last_run_summary=(
"auto: no changes; llm: consolidated 1 into 1\n"
"archived 1 skill(s):\n"
" • thing-b → umbrella\n"
"full report: hermes curator status"
),
)
curator_env["main"]._print_curator_recent_run_notice()
out = curator_env["capsys"].readouterr().out
assert "thing-b → umbrella" in out
assert "thing-a" not in out # only the newer run shows
def test_format_time_ago_buckets(curator_env):
"""Smoke test the time formatter — drives the `last run Xh ago` line."""
fmt = curator_env["main"]._format_time_ago
now = datetime.now(timezone.utc)
assert fmt((now - timedelta(seconds=10)).isoformat()) == "just now"
assert fmt((now - timedelta(minutes=5)).isoformat()) == "5m ago"
assert fmt((now - timedelta(hours=3)).isoformat()) == "3h ago"
assert fmt((now - timedelta(days=2)).isoformat()) == "2d ago"
assert fmt("not-a-real-iso-string") == "recently"

View file

@ -0,0 +1,87 @@
"""Tests for `hermes curator run` CLI behavior."""
from __future__ import annotations
from types import SimpleNamespace
def _args(**kwargs):
values = {
"dry_run": False,
"synchronous": False,
"background": False,
}
values.update(kwargs)
return SimpleNamespace(**values)
def test_run_defaults_to_synchronous(monkeypatch, capsys):
import agent.curator as curator_state
import hermes_cli.curator as curator_cli
calls = []
monkeypatch.setattr(curator_state, "is_enabled", lambda: True)
monkeypatch.setattr(
curator_state,
"run_curator_review",
lambda **kwargs: calls.append(kwargs) or {"auto_transitions": {}},
)
assert curator_cli._cmd_run(_args()) == 0
assert calls[0]["synchronous"] is True
assert calls[0]["dry_run"] is False
assert "background" not in capsys.readouterr().out
def test_run_background_opts_into_async(monkeypatch, capsys):
import agent.curator as curator_state
import hermes_cli.curator as curator_cli
calls = []
monkeypatch.setattr(curator_state, "is_enabled", lambda: True)
monkeypatch.setattr(
curator_state,
"run_curator_review",
lambda **kwargs: calls.append(kwargs) or {"auto_transitions": {}},
)
assert curator_cli._cmd_run(_args(background=True)) == 0
assert calls[0]["synchronous"] is False
assert "llm pass running in background" in capsys.readouterr().out
def test_run_sync_wins_over_background(monkeypatch):
import agent.curator as curator_state
import hermes_cli.curator as curator_cli
calls = []
monkeypatch.setattr(curator_state, "is_enabled", lambda: True)
monkeypatch.setattr(
curator_state,
"run_curator_review",
lambda **kwargs: calls.append(kwargs) or {"auto_transitions": {}},
)
assert curator_cli._cmd_run(_args(synchronous=True, background=True)) == 0
assert calls[0]["synchronous"] is True
def test_dry_run_default_reports_synchronous_wording(monkeypatch, capsys):
import agent.curator as curator_state
import hermes_cli.curator as curator_cli
monkeypatch.setattr(curator_state, "is_enabled", lambda: True)
monkeypatch.setattr(
curator_state,
"run_curator_review",
lambda **kwargs: {"auto_transitions": {}},
)
assert curator_cli._cmd_run(_args(dry_run=True)) == 0
out = capsys.readouterr().out
assert "When the report lands" not in out
assert "Read the report with `hermes curator status`" in out

View file

@ -114,6 +114,12 @@ def test_status_shows_most_and_least_used_sections(curator_status_env):
env["make_skill"]("top-dog")
env["make_skill"]("middling")
env["make_skill"]("never-used")
# Mark all three as agent-created so they enter the curator's catalog.
# Under the provenance-marker semantics, skills must be explicitly opted
# into curator management (normally via the background-review fork when
# it creates a skill through skill_manage).
for n in ("top-dog", "middling", "never-used"):
env["skill_usage"].mark_agent_created(n)
# Bump use_count differentially. All three counters (use/view/patch) feed
# into activity_count, so bumping use alone is enough to make activity
@ -150,7 +156,9 @@ def test_status_hides_most_active_when_all_zero(curator_status_env):
env = curator_status_env
env["make_skill"]("a")
env["make_skill"]("b")
# No bumps.
# Mark both as agent-created so the catalog lists them. No bumps.
env["skill_usage"].mark_agent_created("a")
env["skill_usage"].mark_agent_created("b")
out = _capture_status(env["curator_cli"])
@ -167,3 +175,28 @@ def test_status_no_skills_produces_clean_empty_output(curator_status_env):
# None of the ranking sections render
assert "most active" not in out
assert "least active" not in out
def test_status_marks_missing_last_report_path(monkeypatch, capsys, tmp_path):
import agent.curator as curator_state
import hermes_cli.curator as curator_cli
import tools.skill_usage as skill_usage
missing_report = tmp_path / "stale-report"
monkeypatch.setattr(curator_state, "load_state", lambda: {
"paused": False,
"last_run_at": None,
"last_run_summary": "auto: no changes",
"run_count": 1,
"last_report_path": str(missing_report),
})
monkeypatch.setattr(curator_state, "is_enabled", lambda: True)
monkeypatch.setattr(curator_state, "get_interval_hours", lambda: 168)
monkeypatch.setattr(curator_state, "get_stale_after_days", lambda: 30)
monkeypatch.setattr(curator_state, "get_archive_after_days", lambda: 90)
monkeypatch.setattr(skill_usage, "agent_created_report", lambda: [])
assert curator_cli._cmd_status(SimpleNamespace()) == 0
out = capsys.readouterr().out
assert f"last report: {missing_report} (missing)" in out

View file

@ -56,7 +56,6 @@ class TestCustomProviderModelSwitch:
"sk-test",
"https://vllm.example.com/v1",
timeout=8.0,
api_mode=None,
)
def test_can_switch_to_different_model(self, config_home):
@ -141,12 +140,18 @@ class TestCustomProviderModelSwitch:
"api_mode": "anthropic_messages",
}
with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]), \
with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]) as mock_fetch, \
patch.dict("sys.modules", {"simple_term_menu": None}), \
patch("builtins.input", return_value="1"), \
patch("builtins.print"):
_model_flow_named_custom({}, provider_info)
mock_fetch.assert_called_once_with(
"***",
"https://proxy.example.com/anthropic",
timeout=8.0,
api_mode="anthropic_messages",
)
config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
model = config.get("model")
assert isinstance(model, dict)
@ -215,7 +220,6 @@ class TestCustomProviderModelSwitch:
"sk-live-example-provider",
"https://api.example-provider.test/v1",
timeout=8.0,
api_mode=None,
)
config = yaml.safe_load(config_path.read_text()) or {}
assert config["model"]["api_key"] == "${EXAMPLE_PROVIDER_API_KEY}"

View file

@ -273,6 +273,108 @@ class TestCaptureLogSnapshot:
assert "rotated agent data" in snap.full_text
# ---------------------------------------------------------------------------
# Capture log redaction (force=True applies regardless of HERMES_REDACT_SECRETS)
# ---------------------------------------------------------------------------
# A vendor-prefixed token used across redaction tests. Long enough to clear
# the redactor's `floor` parameter so it actually masks rather than fully blanks.
_REDACT_FIXTURE_TOKEN = "sk-proj-A1B2C3D4E5F6G7H8I9J0aA"
class TestCaptureLogSnapshotRedaction:
"""Pin upload-time redaction at the _capture_log_snapshot boundary."""
@pytest.fixture
def hermes_home_with_secret(self, tmp_path, monkeypatch):
"""Isolated HERMES_HOME whose agent.log contains a vendor-prefixed token."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
# Baseline fixture: no explicit env-var opinion. With the post-#17691
# default of ON, the default-path tests below exercise the
# secure-default behaviour. The `force=True` regression test
# setenvs to "false" inline to prove force=True works even when
# the runtime flag is disabled.
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
logs_dir = home / "logs"
logs_dir.mkdir()
(logs_dir / "agent.log").write_text(
f"2026-04-12 17:00:00 INFO config: api_key={_REDACT_FIXTURE_TOKEN} loaded\n"
)
(logs_dir / "errors.log").write_text("")
(logs_dir / "gateway.log").write_text("")
return home
def test_default_redacts_tail_and_full_text(self, hermes_home_with_secret):
from hermes_cli.debug import _capture_log_snapshot
snap = _capture_log_snapshot("agent", tail_lines=10)
# Both views the upload uses must be sanitized.
assert _REDACT_FIXTURE_TOKEN not in snap.tail_text
assert snap.full_text is not None
assert _REDACT_FIXTURE_TOKEN not in snap.full_text
def test_redact_false_passes_through(self, hermes_home_with_secret):
from hermes_cli.debug import _capture_log_snapshot
snap = _capture_log_snapshot("agent", tail_lines=10, redact=False)
# Original token survives when the caller opts out.
assert _REDACT_FIXTURE_TOKEN in snap.tail_text
assert _REDACT_FIXTURE_TOKEN in (snap.full_text or "")
def test_force_true_works_when_redaction_disabled(
self, hermes_home_with_secret, monkeypatch
):
"""Regression test: redact_sensitive_text short-circuits without force=True.
If a future refactor drops `force=True` from `_redact_log_text`, this
test fails immediately. Without `force=True`, the redactor returns the
input unchanged when HERMES_REDACT_SECRETS=false, and the share-time
redaction feature ships silently broken for users who opted out of
runtime redaction (e.g. developers working on the redactor itself).
"""
import os
# Force the runtime flag off so we're exercising the force=True path,
# not the default-on path.
monkeypatch.setenv("HERMES_REDACT_SECRETS", "false")
from hermes_cli.debug import _capture_log_snapshot
assert os.environ.get("HERMES_REDACT_SECRETS", "") == "false"
snap = _capture_log_snapshot("agent", tail_lines=10)
assert _REDACT_FIXTURE_TOKEN not in snap.tail_text
assert snap.full_text is not None
assert _REDACT_FIXTURE_TOKEN not in snap.full_text
def test_capture_default_log_snapshots_threads_redact(
self, hermes_home_with_secret
):
from hermes_cli.debug import _capture_default_log_snapshots
snaps = _capture_default_log_snapshots(50)
# Default threads redact=True to all three captured logs.
assert _REDACT_FIXTURE_TOKEN not in snaps["agent"].tail_text
assert _REDACT_FIXTURE_TOKEN not in (snaps["agent"].full_text or "")
def test_capture_default_log_snapshots_no_redact_passes_through(
self, hermes_home_with_secret
):
from hermes_cli.debug import _capture_default_log_snapshots
snaps = _capture_default_log_snapshots(50, redact=False)
assert _REDACT_FIXTURE_TOKEN in snaps["agent"].tail_text
assert _REDACT_FIXTURE_TOKEN in (snaps["agent"].full_text or "")
# ---------------------------------------------------------------------------
# Debug report collection
# ---------------------------------------------------------------------------
@ -556,6 +658,124 @@ class TestRunDebugShare:
assert "all failed" in out.err
# ---------------------------------------------------------------------------
# Share-time redaction wiring + visible banner
# ---------------------------------------------------------------------------
class TestRunDebugShareRedaction:
"""End-to-end: --no-redact flag, banner injection, default behavior."""
@pytest.fixture
def hermes_home_with_secret(self, tmp_path, monkeypatch):
"""Isolated HERMES_HOME whose agent.log contains a vendor-prefixed token."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.delenv("HERMES_REDACT_SECRETS", raising=False)
logs_dir = home / "logs"
logs_dir.mkdir()
(logs_dir / "agent.log").write_text(
f"2026-04-12 17:00:00 INFO config: api_key={_REDACT_FIXTURE_TOKEN} loaded\n"
)
(logs_dir / "errors.log").write_text("")
(logs_dir / "gateway.log").write_text(
f"2026-04-12 17:00:01 INFO gateway.run: token {_REDACT_FIXTURE_TOKEN}\n"
)
return home
def test_default_share_redacts_uploaded_content(
self, hermes_home_with_secret, capsys
):
"""The uploaded report and full-log pastes do not contain the raw token."""
from hermes_cli.debug import run_debug_share
args = MagicMock()
args.lines = 50
args.expire = 7
args.local = False
args.no_redact = False
captured: list[str] = []
def fake_upload(content, expiry_days=7):
captured.append(content)
return f"https://paste.rs/{len(captured)}"
with patch("hermes_cli.dump.run_dump"), \
patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \
patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload):
run_debug_share(args)
# At least the report plus one full log paste reached the upload path.
assert len(captured) >= 2
for content in captured:
assert _REDACT_FIXTURE_TOKEN not in content, (
"raw token leaked into upload-bound content"
)
def test_default_share_includes_redaction_banner(
self, hermes_home_with_secret, capsys
):
"""Each upload-bound paste carries the visible redaction banner."""
from hermes_cli.debug import run_debug_share
args = MagicMock()
args.lines = 50
args.expire = 7
args.local = False
args.no_redact = False
captured: list[str] = []
def fake_upload(content, expiry_days=7):
captured.append(content)
return f"https://paste.rs/{len(captured)}"
with patch("hermes_cli.dump.run_dump"), \
patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \
patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload):
run_debug_share(args)
for content in captured:
assert "redacted at upload time" in content, (
"redaction banner missing from upload-bound content"
)
def test_no_redact_flag_disables_redaction_and_banner(
self, hermes_home_with_secret, capsys
):
"""--no-redact preserves original log content and omits the banner."""
from hermes_cli.debug import run_debug_share
args = MagicMock()
args.lines = 50
args.expire = 7
args.local = False
args.no_redact = True
captured: list[str] = []
def fake_upload(content, expiry_days=7):
captured.append(content)
return f"https://paste.rs/{len(captured)}"
with patch("hermes_cli.dump.run_dump"), \
patch("hermes_cli.debug._sweep_expired_pastes", return_value=(0, 0)), \
patch("hermes_cli.debug.upload_to_pastebin", side_effect=fake_upload):
run_debug_share(args)
# The agent.log paste should now contain the raw token.
assert any(_REDACT_FIXTURE_TOKEN in c for c in captured), (
"expected raw token in --no-redact upload"
)
# No banner anywhere when redaction is disabled.
for content in captured:
assert "redacted at upload time" not in content, (
"banner present with --no-redact"
)
# ---------------------------------------------------------------------------
# run_debug router
# ---------------------------------------------------------------------------

View file

@ -0,0 +1,86 @@
"""Tests for the approvals.destructive_slash_confirm config gate.
Destructive session slash commands (/clear, /new, /reset, /undo) discard
conversation state. This config key (default True) gates a three-option
confirmation prompt "Always Approve" flips the key to False so future
destructive commands run silently.
See gateway/run.py::_maybe_confirm_destructive_slash and
cli.py::_confirm_destructive_slash for the runtime gate.
"""
from __future__ import annotations
from hermes_cli.config import DEFAULT_CONFIG
class TestDestructiveSlashConfirmDefault:
def test_default_config_has_the_key(self):
approvals = DEFAULT_CONFIG.get("approvals")
assert isinstance(approvals, dict)
assert "destructive_slash_confirm" in approvals
def test_default_is_true(self):
# New installs confirm by default — destructive commands must not
# silently wipe history without an explicit user "yes".
assert DEFAULT_CONFIG["approvals"]["destructive_slash_confirm"] is True
def test_shape_matches_other_approval_keys(self):
approvals = DEFAULT_CONFIG["approvals"]
assert isinstance(approvals.get("destructive_slash_confirm"), bool)
# Sibling key shape sanity — same flat dict level as mcp_reload_confirm.
assert isinstance(approvals.get("mcp_reload_confirm"), bool)
class TestUserConfigMerge:
"""If a user has a pre-existing config without this key, load_config
should fill it in from DEFAULT_CONFIG (deep merge preserves keys the
user didn't override)."""
def test_existing_user_config_without_key_gets_default(self, tmp_path, monkeypatch):
import yaml
home = tmp_path / ".hermes"
home.mkdir()
cfg_path = home / "config.yaml"
legacy = {
"approvals": {"mode": "manual", "timeout": 60, "cron_mode": "deny"},
}
cfg_path.write_text(yaml.safe_dump(legacy))
monkeypatch.setenv("HERMES_HOME", str(home))
import importlib
import hermes_cli.config as cfg_mod
importlib.reload(cfg_mod)
cfg = cfg_mod.load_config()
assert cfg["approvals"]["destructive_slash_confirm"] is True
def test_existing_user_config_with_false_key_survives_merge(
self, tmp_path, monkeypatch,
):
"""A user who clicked "Always Approve" (key=false) must keep that
setting the default-true value must not win on later loads.
"""
import yaml
home = tmp_path / ".hermes"
home.mkdir()
cfg_path = home / "config.yaml"
user_cfg = {
"approvals": {
"mode": "manual",
"timeout": 60,
"cron_mode": "deny",
"destructive_slash_confirm": False,
},
}
cfg_path.write_text(yaml.safe_dump(user_cfg))
monkeypatch.setenv("HERMES_HOME", str(home))
import importlib
import hermes_cli.config as cfg_mod
importlib.reload(cfg_mod)
cfg = cfg_mod.load_config()
assert cfg["approvals"]["destructive_slash_confirm"] is False

View file

@ -0,0 +1,246 @@
"""Tests for Discord /skill 32-char clamp collision warnings.
Discord's per-command name limit is 32 chars, so
``discord_skill_commands_by_category`` clamps skill slugs to that width
before deduping. When two skills share the same 32-char prefix, only
the first (alphabetical) wins; the second is dropped. Previously the
drop was silent the ``hidden`` count incremented but nothing named
which skills collided, so authors had no way to discover the drop
short of noticing that their skill was missing from the autocomplete.
This module pins the upgraded behavior: a WARNING log with both full
cmd_keys + the clamped name, so whoever named the skills sees the
collision and can rename one.
"""
from __future__ import annotations
import logging
from pathlib import Path
from unittest.mock import patch
def test_clamp_collision_emits_warning_naming_both_skills(
tmp_path: Path, caplog
) -> None:
"""Two skills with identical first 32 chars — warning names both."""
from hermes_cli.commands import discord_skill_commands_by_category
# Craft cmd_keys that share the first 32 chars.
# 40-char prefix 'skill-collision-prefix-identical-first-32'
# -> clamped to 'skill-collision-prefix-identical'
prefix = "skill-collision-prefix-identical" # exactly 32 chars
name_a = prefix + "-alpha" # /skill-collision-prefix-identical-alpha
name_b = prefix + "-bravo" # /skill-collision-prefix-identical-bravo
assert name_a[:32] == name_b[:32] == prefix
skills_dir = tmp_path / "skills"
for nm in (name_a, name_b):
d = skills_dir / "creative" / nm
d.mkdir(parents=True)
(d / "SKILL.md").write_text("---\nname: x\n---\n")
fake_cmds = {
f"/{name_a}": {
"name": name_a,
"description": "Alpha",
"skill_md_path": str(skills_dir / "creative" / name_a / "SKILL.md"),
},
f"/{name_b}": {
"name": name_b,
"description": "Bravo",
"skill_md_path": str(skills_dir / "creative" / name_b / "SKILL.md"),
},
}
with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(),
)
# One skill made it through, one was dropped (hidden counted).
assert hidden == 1
kept_names = [n for n, _d, _k in categories.get("creative", [])]
assert len(kept_names) == 1
# Alphabetical iteration means the -alpha variant wins the slot.
assert kept_names[0] == prefix # clamped
# Exactly one warning, naming BOTH full cmd_keys and the clamped name.
warnings = [
r for r in caplog.records
if r.levelno == logging.WARNING and "clamp" in r.getMessage()
]
assert len(warnings) == 1, (
f"expected exactly one clamp-collision warning, got {len(warnings)}: "
f"{[r.getMessage() for r in warnings]}"
)
msg = warnings[0].getMessage()
assert f"/{name_a}" in msg, f"winner not named in warning: {msg!r}"
assert f"/{name_b}" in msg, f"loser not named in warning: {msg!r}"
assert prefix in msg, f"clamped name not in warning: {msg!r}"
def test_clamp_collision_with_reserved_name_emits_distinct_warning(
tmp_path: Path, caplog
) -> None:
"""A skill clashing with a reserved gateway command gets its own phrasing.
The reserved-vs-skill case is operationally different the fix is
still "rename the skill," but there's no second skill to also
rename. The warning should say so explicitly.
"""
from hermes_cli.commands import discord_skill_commands_by_category
# Reserved name 'help' is 4 chars — make a skill whose slug
# clamps to 'help' (so, exactly 'help').
reserved = "help"
skills_dir = tmp_path / "skills"
d = skills_dir / "creative" / reserved
d.mkdir(parents=True)
(d / "SKILL.md").write_text("---\nname: x\n---\n")
fake_cmds = {
f"/{reserved}": {
"name": reserved,
"description": "desc",
"skill_md_path": str(d / "SKILL.md"),
},
}
with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names={"help"},
)
# Skill dropped in favor of the reserved command.
assert hidden == 1
assert categories == {}
assert uncategorized == []
warnings = [
r for r in caplog.records
if r.levelno == logging.WARNING and "reserved" in r.getMessage()
]
assert len(warnings) == 1, (
f"expected one reserved-name collision warning, got "
f"{[r.getMessage() for r in warnings]}"
)
msg = warnings[0].getMessage()
assert f"/{reserved}" in msg
assert "reserved" in msg.lower()
def test_no_collision_no_warning(tmp_path: Path, caplog) -> None:
"""Sanity: two distinct-prefix skills produce zero warnings."""
from hermes_cli.commands import discord_skill_commands_by_category
skills_dir = tmp_path / "skills"
for nm in ("alpha", "bravo"):
d = skills_dir / "creative" / nm
d.mkdir(parents=True)
(d / "SKILL.md").write_text("---\nname: x\n---\n")
fake_cmds = {
"/alpha": {
"name": "alpha", "description": "",
"skill_md_path": str(skills_dir / "creative" / "alpha" / "SKILL.md"),
},
"/bravo": {
"name": "bravo", "description": "",
"skill_md_path": str(skills_dir / "creative" / "bravo" / "SKILL.md"),
},
}
with caplog.at_level(logging.WARNING, logger="hermes_cli.commands"), (
patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds)
), patch("tools.skills_tool.SKILLS_DIR", skills_dir):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(),
)
assert hidden == 0
assert {n for n, _d, _k in categories["creative"]} == {"alpha", "bravo"}
clamp_warnings = [
r for r in caplog.records
if r.levelno == logging.WARNING
and ("clamp" in r.getMessage() or "reserved" in r.getMessage())
]
assert clamp_warnings == []
def test_long_skill_name_preserves_cmd_key_through_by_category(
tmp_path: Path,
) -> None:
"""Skills with names > 32 chars must keep their original cmd_key.
``discord_skill_commands_by_category`` clamps the display name to 32
chars but the third tuple element (cmd_key) must stay as the original
``/full-skill-name`` so that ``_skill_handler`` dispatches via
``_run_simple_slash`` with the full command, not the truncated one.
This is the actual runtime path used by the Discord adapter via
``_refresh_skill_catalog_state``.
"""
from hermes_cli.commands import discord_skill_commands_by_category
skills_dir = tmp_path / "skills"
skills_dir.mkdir()
resolved = str(skills_dir.resolve())
long_name = "generate-ascii-art-from-text-description-detailed"
cmd_key = f"/{long_name}"
fake_cmds = {
cmd_key: {
"name": long_name,
"description": "Generate ASCII art from a text description",
"skill_md_path": f"{resolved}/creative/{long_name}/SKILL.md",
"skill_dir": f"{resolved}/creative/{long_name}",
},
"/short-skill": {
"name": "short-skill",
"description": "A short skill",
"skill_md_path": f"{resolved}/creative/short-skill/SKILL.md",
"skill_dir": f"{resolved}/creative/short-skill",
},
}
with patch("agent.skill_commands.get_skill_commands", return_value=fake_cmds), \
patch("tools.skills_tool.SKILLS_DIR", skills_dir):
categories, uncategorized, hidden = discord_skill_commands_by_category(
reserved_names=set(),
)
# Flatten (same as _refresh_skill_catalog_state does)
entries = list(uncategorized)
for cat_skills in categories.values():
entries.extend(cat_skills)
# Build lookup (same as _refresh_skill_catalog_state does)
skill_lookup = {n: (d, k) for n, d, k in entries}
# Find the long skill
long_entry = [e for e in entries if e[2] == cmd_key]
assert len(long_entry) == 1, f"Long skill should appear once, got: {long_entry}"
display_name, desc, key = long_entry[0]
assert len(display_name) <= 32, (
f"Display name should be clamped to 32 chars, got {len(display_name)}"
)
assert key == cmd_key, (
f"cmd_key must be the original /{long_name}, got {key!r}"
)
# Verify lookup works: clamped display name -> original cmd_key
assert display_name in skill_lookup
_desc, looked_up_key = skill_lookup[display_name]
assert looked_up_key == cmd_key, (
f"Lookup must map clamped name to original cmd_key, got {looked_up_key!r}"
)
# Short skill should also be present and correct
short_entry = [e for e in entries if e[2] == "/short-skill"]
assert len(short_entry) == 1
assert short_entry[0][0] == "short-skill"

View file

@ -51,6 +51,57 @@ class TestProviderEnvDetection:
assert not _has_provider_env_config(content)
class TestDoctorEnvFileEncoding:
"""Regression for #18637 (bug 3): `hermes doctor` crashed on Windows
Chinese locale (GBK) because `.env` was read with Path.read_text() which
defaults to the system locale encoding, not UTF-8."""
def test_doctor_reads_env_as_utf8_even_when_locale_is_not_utf8(
self, monkeypatch, tmp_path
):
import pathlib
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
# Write a UTF-8 .env containing an em dash (U+2014 = e2 80 94). The
# 0x94 byte is exactly the one the issue reporter hit: it's invalid
# as a GBK trailing byte in this position, so locale-default reads
# raise UnicodeDecodeError on Chinese Windows.
env_path = hermes_home / ".env"
env_path.write_text(
"OPENAI_API_KEY=sk-test # em-dash here — should not crash\n",
encoding="utf-8",
)
monkeypatch.setattr(doctor_mod, "HERMES_HOME", hermes_home)
orig_read_text = pathlib.Path.read_text
def gbk_like_read_text(self, encoding=None, errors=None, **kwargs):
# Simulate a GBK locale: refuse to decode this specific UTF-8
# .env unless the caller pins encoding="utf-8".
if self == env_path and encoding != "utf-8":
raise UnicodeDecodeError(
"gbk", b"\x94", 0, 1, "illegal multibyte sequence"
)
return orig_read_text(self, encoding=encoding, errors=errors, **kwargs)
monkeypatch.setattr(pathlib.Path, "read_text", gbk_like_read_text)
# Short-circuit the expensive tool-availability probe — we only
# need doctor to reach the .env read without crashing.
fake_model_tools = types.SimpleNamespace(
check_tool_availability=lambda *a, **kw: (_ for _ in ()).throw(SystemExit(0)),
TOOLSET_REQUIREMENTS={},
)
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
# Run doctor. If the .env read still uses locale encoding, this
# raises UnicodeDecodeError and the test fails.
with pytest.raises(SystemExit):
doctor_mod.run_doctor(Namespace(fix=False))
class TestDoctorToolAvailabilityOverrides:
def test_marks_honcho_available_when_configured(self, monkeypatch):
monkeypatch.setattr(doctor, "_honcho_is_configured_for_doctor", lambda: True)
@ -75,6 +126,47 @@ class TestDoctorToolAvailabilityOverrides:
assert available == []
assert unavailable == [honcho_entry]
def test_marks_kanban_available_only_when_missing_worker_env_gate(self, monkeypatch):
monkeypatch.setattr(doctor, "_honcho_is_configured_for_doctor", lambda: False)
monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False)
available, unavailable = doctor._apply_doctor_tool_availability_overrides(
[],
[{"name": "kanban", "env_vars": [], "tools": ["kanban_show"]}],
)
assert available == ["kanban"]
assert unavailable == []
def test_leaves_kanban_unavailable_when_worker_env_is_set(self, monkeypatch):
monkeypatch.setenv("HERMES_KANBAN_TASK", "probe")
kanban_entry = {"name": "kanban", "env_vars": [], "tools": ["kanban_show"]}
available, unavailable = doctor._apply_doctor_tool_availability_overrides(
[],
[kanban_entry],
)
assert available == []
assert unavailable == [kanban_entry]
def test_leaves_non_worker_kanban_failure_unavailable(self, monkeypatch):
monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False)
kanban_entry = {"name": "kanban", "env_vars": [], "tools": ["kanban_show", "not_a_kanban_tool"]}
available, unavailable = doctor._apply_doctor_tool_availability_overrides(
[],
[kanban_entry],
)
assert available == []
assert unavailable == [kanban_entry]
def test_kanban_doctor_detail_explains_worker_gate(self, monkeypatch):
monkeypatch.delenv("HERMES_KANBAN_TASK", raising=False)
assert doctor._doctor_tool_availability_detail("kanban") == "(runtime-gated; loaded only for dispatcher-spawned workers)"
class TestHonchoDoctorConfigDetection:
def test_reports_configured_when_enabled_with_api_key(self, monkeypatch):
@ -286,6 +378,11 @@ def test_run_doctor_termux_treats_docker_and_browser_warnings_as_expected(monkey
assert "1) pkg install nodejs" in out
assert "2) npm install -g agent-browser" in out
assert "3) agent-browser install" in out
assert "Termux compatibility fallbacks:" in out
assert "use .[termux-all] for broad compatibility" in out
assert "Matrix E2EE extra is excluded on Termux" in out
assert "Local faster-whisper extra is excluded on Termux" in out
assert "STT fallback: use Groq Whisper (set GROQ_API_KEY) or OpenAI Whisper (set VOICE_TOOLS_OPENAI_KEY)." in out
assert "docker not found (optional)" not in out
@ -430,6 +527,46 @@ def test_run_doctor_accepts_hermes_provider_ids_that_catalog_aliases(
)
def test_run_doctor_accepts_kimi_coding_cn_provider(monkeypatch, tmp_path):
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
(home / ".env").write_text("KIMI_CN_API_KEY=***\n", encoding="utf-8")
(home / "config.yaml").write_text(
"model:\n"
" provider: kimi-coding-cn\n"
" default: kimi-k2.6\n",
encoding="utf-8",
)
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", tmp_path / "project")
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
(tmp_path / "project").mkdir(exist_ok=True)
fake_model_tools = types.SimpleNamespace(
check_tool_availability=lambda *a, **kw: ([], []),
TOOLSET_REQUIREMENTS={},
)
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
try:
from hermes_cli import auth as _auth_mod
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
monkeypatch.setattr(_auth_mod, "get_auth_status", lambda provider: {"logged_in": True})
except Exception:
pass
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
doctor_mod.run_doctor(Namespace(fix=False))
out = buf.getvalue()
assert "model.provider 'kimi-coding-cn' is not a recognised provider" not in out
def test_run_doctor_termux_does_not_mark_browser_available_without_agent_browser(monkeypatch, tmp_path):
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
@ -520,6 +657,60 @@ def test_run_doctor_kimi_cn_env_is_detected_and_probe_is_null_safe(monkeypatch,
assert any(url == "https://api.moonshot.cn/v1/models" for url, _, _ in calls)
def test_run_doctor_dashscope_retries_china_endpoint_after_intl_unauthorized(monkeypatch, tmp_path):
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
(home / "config.yaml").write_text("memory: {}\n", encoding="utf-8")
(home / ".env").write_text("DASHSCOPE_API_KEY=sk-test\n", encoding="utf-8")
project = tmp_path / "project"
project.mkdir(exist_ok=True)
monkeypatch.setattr(doctor_mod, "HERMES_HOME", home)
monkeypatch.setattr(doctor_mod, "PROJECT_ROOT", project)
monkeypatch.setattr(doctor_mod, "_DHH", str(home))
monkeypatch.setenv("DASHSCOPE_API_KEY", "sk-test")
monkeypatch.delenv("DASHSCOPE_BASE_URL", raising=False)
fake_model_tools = types.SimpleNamespace(
check_tool_availability=lambda *a, **kw: ([], []),
TOOLSET_REQUIREMENTS={},
)
monkeypatch.setitem(sys.modules, "model_tools", fake_model_tools)
try:
from hermes_cli import auth as _auth_mod
monkeypatch.setattr(_auth_mod, "get_nous_auth_status", lambda: {})
monkeypatch.setattr(_auth_mod, "get_codex_auth_status", lambda: {})
except ImportError:
pass
calls = []
def fake_get(url, headers=None, timeout=None):
calls.append((url, headers, timeout))
status = 200 if "dashscope.aliyuncs.com" in url else 401
return types.SimpleNamespace(status_code=status)
import httpx
monkeypatch.setattr(httpx, "get", fake_get)
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
doctor_mod.run_doctor(Namespace(fix=False))
out = buf.getvalue()
assert "Alibaba/DashScope" in out
assert "invalid API key" not in out
assert any(
url == "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models"
for url, _, _ in calls
)
assert any(
url == "https://dashscope.aliyuncs.com/compatible-mode/v1/models"
for url, _, _ in calls
)
@pytest.mark.parametrize("base_url", [None, "https://opencode.ai/zen/go/v1"])
def test_run_doctor_opencode_go_skips_invalid_models_probe(monkeypatch, tmp_path, base_url):
home = tmp_path / ".hermes"
@ -572,3 +763,79 @@ def test_run_doctor_opencode_go_skips_invalid_models_probe(monkeypatch, tmp_path
)
assert not any(url == "https://opencode.ai/zen/go/v1/models" for url, _, _ in calls)
assert not any("opencode" in url.lower() and "models" in url.lower() for url, _, _ in calls)
class TestGitHubTokenCheck:
"""Tests for GitHub token / gh auth detection in doctor."""
def test_no_token_and_not_gh_authenticated_shows_warn(self, monkeypatch, tmp_path):
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setenv("PATH", "/nonexistent") # gh not found
from hermes_cli.doctor import run_doctor, _DHH
import io, contextlib
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
run_doctor(Namespace(fix=False))
out = buf.getvalue()
assert "No GITHUB_TOKEN" in out
assert "60 req/hr" in out
def test_token_env_present_shows_ok(self, monkeypatch, tmp_path):
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setenv("GITHUB_TOKEN", "ghp_test123")
monkeypatch.setenv("PATH", "/nonexistent") # gh not found
from hermes_cli.doctor import run_doctor
import io, contextlib
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
run_doctor(Namespace(fix=False))
out = buf.getvalue()
assert "GitHub token configured" in out
def test_gh_authenticated_without_env_token_shows_ok(self, monkeypatch, tmp_path):
home = tmp_path / ".hermes"
home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(home))
# No GITHUB_TOKEN or GH_TOKEN
monkeypatch.delenv("GITHUB_TOKEN", raising=False)
monkeypatch.delenv("GH_TOKEN", raising=False)
# Mock gh to return success
import shutil
real_which = shutil.which
def mock_which(cmd):
return "/usr/local/bin/gh" if cmd == "gh" else real_which(cmd)
monkeypatch.setattr(shutil, "which", mock_which)
call_log = []
def mock_run(cmd, **kwargs):
call_log.append(cmd)
if cmd[:2] == ["gh", "auth"]:
result = types.SimpleNamespace(returncode=0, stdout="", stderr="")
else:
result = types.SimpleNamespace(returncode=1, stdout="", stderr="")
return result
import subprocess
monkeypatch.setattr(subprocess, "run", mock_run)
from hermes_cli.doctor import run_doctor
import io, contextlib
buf = io.StringIO()
with contextlib.redirect_stdout(buf):
run_doctor(Namespace(fix=False))
out = buf.getvalue()
assert "gh auth" in str(call_log) or any(c[0] == "gh" for c in call_log), f"gh not called: {call_log}"
assert "GitHub authenticated via gh CLI" in out or "token configured" in out

View file

@ -0,0 +1,50 @@
"""Regression: hermes doctor must not run a generic Bearer-auth health
check for providers that already have a dedicated check (Anthropic,
OpenRouter, Bedrock).
Anthropic's native API requires `x-api-key` + `anthropic-version` headers;
the generic loop sends `Authorization: Bearer ...` which Anthropic answers
with HTTP 404. The dedicated check at hermes_cli/doctor.py already covers
Anthropic with the right headers, so the pluggable profile must be
skipped by `_build_apikey_providers_list()`.
See: NousResearch/hermes-agent#22346
"""
from __future__ import annotations
def test_build_apikey_providers_list_skips_dedicated_check_providers():
from hermes_cli import doctor
# Force a rebuild — the module caches the list on first call.
doctor._APIKEY_PROVIDERS_CACHE = None
entries = doctor._build_apikey_providers_list()
# Tuple shape: (display_name, env_vars, default_url, base_env, supports_health_check)
names = {entry[0].lower() for entry in entries}
assert not any("anthropic" in name for name in names), (
f"Anthropic provider profile leaked into generic Bearer-auth health "
f"check loop. Dedicated check above already covers it with "
f"x-api-key headers. Got entries: {sorted(names)}"
)
assert not any("openrouter" in name for name in names), (
f"OpenRouter has a dedicated check; generic loop must skip it. "
f"Got: {sorted(names)}"
)
assert not any("bedrock" in name for name in names), (
f"Bedrock uses AWS SDK creds, not Bearer auth; generic loop must skip. "
f"Got: {sorted(names)}"
)
def test_build_apikey_providers_list_includes_non_dedicated_providers():
"""Sanity guard: the skip-set must not strip every provider."""
from hermes_cli import doctor
doctor._APIKEY_PROVIDERS_CACHE = None
entries = doctor._build_apikey_providers_list()
names = {entry[0] for entry in entries}
assert "DeepSeek" in names
assert "Z.AI / GLM" in names

View file

@ -37,7 +37,7 @@ def test_project_env_is_sanitized_before_loading(tmp_path, monkeypatch):
home = tmp_path / "hermes"
project_env = tmp_path / ".env"
project_env.write_text(
"TELEGRAM_BOT_TOKEN=8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
"TELEGRAM_BOT_TOKEN=0123456789:test"
"ANTHROPIC_API_KEY=sk-ant-test123\n",
encoding="utf-8",
)
@ -48,7 +48,7 @@ def test_project_env_is_sanitized_before_loading(tmp_path, monkeypatch):
loaded = load_hermes_dotenv(hermes_home=home, project_env=project_env)
assert loaded == [project_env]
assert os.getenv("TELEGRAM_BOT_TOKEN") == "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
assert os.getenv("TELEGRAM_BOT_TOKEN") == "0123456789:test"
assert os.getenv("ANTHROPIC_API_KEY") == "sk-ant-test123"

View file

@ -14,7 +14,7 @@ def test_load_env_sanitizes_concatenated_lines():
"""
from hermes_cli.config import load_env
token = "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
token = "0123456789:test"
# Simulate concatenated line: TOKEN=xxx followed immediately by another key
corrupted = f"TELEGRAM_BOT_TOKEN={token}ANTHROPIC_API_KEY=sk-ant-test123\n"
@ -67,7 +67,7 @@ def test_env_loader_sanitizes_before_dotenv():
"""Verify env_loader._sanitize_env_file_if_needed fixes corrupted files."""
from hermes_cli.env_loader import _sanitize_env_file_if_needed
token = "8356550917:AAGGEkzg06Hrc3Hjb3Sa1jkGVDOdU_lYy2Q"
token = "0123456789:test"
corrupted = f"TELEGRAM_BOT_TOKEN={token}ANTHROPIC_API_KEY=sk-ant-test\n"
with tempfile.NamedTemporaryFile(

View file

@ -13,6 +13,21 @@ def _install_fake_gateway_run(monkeypatch, start_gateway):
module = ModuleType("gateway.run")
module.start_gateway = start_gateway
monkeypatch.setitem(sys.modules, "gateway.run", module)
# ``run_gateway()`` calls ``refresh_systemd_unit_if_needed()`` on every
# invocation so that restart settings stay current after exit-code-75
# respawns. That helper writes to ``Path.home() / ".config/systemd/user
# /hermes-gateway.service"`` and runs ``systemctl --user daemon-reload``
# — both target the *real* user environment because the conftest only
# sandboxes ``HERMES_HOME``, not ``HOME``. Tests that drive
# ``run_gateway()`` end-to-end with a fake ``start_gateway`` MUST stub
# the refresh call too, or every run rewrites the developer's installed
# unit (baking in the test's pytest-tmp ``HERMES_HOME`` value, which
# systemd then uses on the next boot — silently breaking the gateway
# for the developer).
monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
monkeypatch.setattr(
gateway, "refresh_systemd_unit_if_needed", lambda system=False: False
)
def test_run_gateway_exits_cleanly_on_keyboard_interrupt(monkeypatch, capsys):
@ -53,6 +68,103 @@ def test_run_gateway_exits_nonzero_when_start_gateway_reports_failure(monkeypatc
assert calls == [(True, None)]
def test_run_gateway_refuses_root_in_official_docker(monkeypatch, tmp_path, capsys):
project_root = tmp_path / "opt" / "hermes"
(project_root / "docker").mkdir(parents=True)
(project_root / "docker" / "entrypoint.sh").write_text("#!/bin/sh\n")
monkeypatch.setattr(gateway, "PROJECT_ROOT", project_root)
monkeypatch.setattr(gateway.os, "geteuid", lambda: 0)
monkeypatch.delenv("HERMES_ALLOW_ROOT_GATEWAY", raising=False)
monkeypatch.setattr(gateway, "_is_official_docker_checkout", lambda: True)
with pytest.raises(SystemExit) as exc_info:
gateway.run_gateway()
assert exc_info.value.code == 1
out = capsys.readouterr().out
assert "Refusing to run the Hermes gateway as root" in out
assert "/opt/hermes/docker/entrypoint.sh" in out
def test_run_gateway_root_guard_has_escape_hatch(monkeypatch):
calls = []
def fake_start_gateway(*, replace, verbosity):
calls.append((replace, verbosity))
return object()
_install_fake_gateway_run(monkeypatch, fake_start_gateway)
monkeypatch.setattr(gateway.asyncio, "run", lambda coro: True)
monkeypatch.setattr(gateway.os, "geteuid", lambda: 0)
monkeypatch.setattr(gateway, "_is_official_docker_checkout", lambda: True)
monkeypatch.setenv("HERMES_ALLOW_ROOT_GATEWAY", "1")
gateway.run_gateway(verbose=2, replace=True)
assert calls == [(True, 2)]
def test_run_gateway_windows_foreground_keeps_ctrl_c_enabled(monkeypatch):
calls = []
def fake_start_gateway(*, replace, verbosity):
calls.append((replace, verbosity))
return object()
class _TTY:
def isatty(self):
return True
signal_calls = []
def fake_signal(sig, handler):
signal_calls.append((sig, handler))
_install_fake_gateway_run(monkeypatch, fake_start_gateway)
monkeypatch.setattr(gateway, "is_windows", lambda: True)
monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
monkeypatch.setattr(gateway.sys, "stdin", _TTY())
monkeypatch.delenv("HERMES_GATEWAY_DETACHED", raising=False)
monkeypatch.setattr(gateway.signal, "signal", fake_signal)
monkeypatch.setattr(gateway.asyncio, "run", lambda coro: True)
gateway.run_gateway()
assert calls == [(False, 0)]
assert (gateway.signal.SIGINT, gateway.signal.SIG_IGN) not in signal_calls
def test_run_gateway_windows_detached_absorbs_console_controls(monkeypatch):
calls = []
def fake_start_gateway(*, replace, verbosity):
calls.append((replace, verbosity))
return object()
class _TTY:
def isatty(self):
return True
signal_calls = []
def fake_signal(sig, handler):
signal_calls.append((sig, handler))
_install_fake_gateway_run(monkeypatch, fake_start_gateway)
monkeypatch.setattr(gateway, "is_windows", lambda: True)
monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
monkeypatch.setattr(gateway.sys, "stdin", _TTY())
monkeypatch.setenv("HERMES_GATEWAY_DETACHED", "1")
monkeypatch.setattr(gateway.signal, "signal", fake_signal)
monkeypatch.setattr(gateway.asyncio, "run", lambda coro: True)
gateway.run_gateway()
assert calls == [(False, 0)]
assert (gateway.signal.SIGINT, gateway.signal.SIG_IGN) in signal_calls
class TestSystemdLingerStatus:
def test_reports_enabled(self, monkeypatch):
monkeypatch.setattr(gateway, "is_linux", lambda: True)
@ -307,9 +419,22 @@ def test_find_gateway_pids_falls_back_to_pid_file_when_process_scan_fails(monkey
monkeypatch.setattr(gateway, "is_windows", lambda: False)
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 321)
# /proc walk is the first path tried (#22693). Force os.listdir on /proc
# to raise so the function falls back to ps, where fake_run takes over.
_real_listdir = gateway.os.listdir
def _no_proc_listdir(path):
if path == "/proc":
raise OSError("test stub: /proc unavailable")
return _real_listdir(path)
monkeypatch.setattr(gateway.os, "listdir", _no_proc_listdir)
def fake_run(cmd, **kwargs):
if cmd[:4] == ["ps", "-A", "eww", "-o"]:
return SimpleNamespace(returncode=1, stdout="", stderr="ps failed")
if cmd[:3] == ["ps", "-o", "ppid="]:
# _get_ancestor_pids() walks up the tree; return "no parent" so
# the loop terminates cleanly.
return SimpleNamespace(returncode=1, stdout="", stderr="")
raise AssertionError(f"Unexpected command: {cmd}")
monkeypatch.setattr(gateway.subprocess, "run", fake_run)
@ -409,14 +534,21 @@ class TestWaitForGatewayExit:
class TestStopProfileGateway:
def test_stop_profile_gateway_keeps_pid_file_when_process_still_running(self, monkeypatch):
calls = {"kill": 0, "remove": 0}
calls = {"kill": 0, "alive_probes": 0, "remove": 0}
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 12345)
# Post-#21561: the stop loop sends one SIGTERM via ``os.kill`` then
# polls liveness via ``gateway.status._pid_exists`` (safe on
# Windows — bpo-14484). Instrument both seams separately.
monkeypatch.setattr(
gateway.os,
"kill",
lambda pid, sig: calls.__setitem__("kill", calls["kill"] + 1),
)
monkeypatch.setattr(
"gateway.status._pid_exists",
lambda pid: calls.__setitem__("alive_probes", calls["alive_probes"] + 1) or True,
)
monkeypatch.setattr("time.sleep", lambda _: None)
monkeypatch.setattr(
"gateway.status.remove_pid_file",
@ -424,5 +556,6 @@ class TestStopProfileGateway:
)
assert gateway.stop_profile_gateway() is True
assert calls["kill"] == 21
assert calls["kill"] == 1 # one SIGTERM
assert calls["alive_probes"] == 20 # 20 liveness polls over the 2s window
assert calls["remove"] == 0

View file

@ -0,0 +1,138 @@
"""Tests for /proc-based gateway PID detection in Docker environments.
Verifies that _scan_gateway_pids() uses /proc/*/cmdline when available
(Docker without procps) and falls back to ps only when /proc is absent.
See: NousResearch/hermes-agent#7622
"""
import os
from unittest.mock import MagicMock, patch
import hermes_cli.gateway as gateway_mod
# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------
_GATEWAY_CMD = "python -m hermes_cli.main gateway run"
_OTHER_CMD = "python -m some_other_thing"
def _fake_proc_dir(entries: dict):
"""Return side_effects that simulate /proc: isdir → True, listdir → pids,
open(cmdline) null-delimited command bytes."""
def _isdir(path):
return str(path) == "/proc"
def _listdir(path):
if str(path) == "/proc":
return [str(pid) for pid in entries] + ["self", "version"]
raise FileNotFoundError(path)
def _open(path, mode="r", **kwargs):
path_str = str(path)
if "/cmdline" in path_str:
pid = int(path_str.split("/proc/")[1].split("/")[0])
raw = entries.get(pid, "").encode("utf-8").replace(b" ", b"\x00")
m = MagicMock()
m.read.return_value = raw
m.__enter__ = lambda s: s
m.__exit__ = MagicMock(return_value=False)
return m
raise FileNotFoundError(path)
return _isdir, _listdir, _open
# ---------------------------------------------------------------------------
# Tests
# ---------------------------------------------------------------------------
class TestProcFallback:
"""_scan_gateway_pids reads /proc when available, skips ps."""
def test_detects_gateway_pid_via_proc(self):
my_pid = os.getpid()
entries = {
my_pid: "python -m hermes_cli.main", # own process — excluded
12345: _GATEWAY_CMD,
99999: _OTHER_CMD,
}
_isdir, _listdir, _open = _fake_proc_dir(entries)
with (
patch("hermes_cli.gateway.is_windows", return_value=False),
patch("os.path.isdir", side_effect=_isdir),
patch("os.listdir", side_effect=_listdir),
patch("builtins.open", side_effect=_open),
patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
patch("subprocess.run") as mock_ps,
):
pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
assert 12345 in pids
assert 99999 not in pids
mock_ps.assert_not_called() # ps must NOT be called when /proc worked
def test_excludes_own_pid_from_proc_scan(self):
my_pid = os.getpid()
entries = {my_pid: _GATEWAY_CMD}
_isdir, _listdir, _open = _fake_proc_dir(entries)
with (
patch("hermes_cli.gateway.is_windows", return_value=False),
patch("os.path.isdir", side_effect=_isdir),
patch("os.listdir", side_effect=_listdir),
patch("builtins.open", side_effect=_open),
patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
patch("subprocess.run"),
):
pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
assert my_pid not in pids
def test_falls_back_to_ps_when_proc_absent(self):
ps_output = f"12345 {_GATEWAY_CMD}\n99999 {_OTHER_CMD}\n"
mock_result = MagicMock()
mock_result.returncode = 0
mock_result.stdout = ps_output
with (
patch("hermes_cli.gateway.is_windows", return_value=False),
patch("os.path.isdir", return_value=False),
patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
patch("subprocess.run", return_value=mock_result) as mock_ps,
):
pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
mock_ps.assert_called_once()
assert 12345 in pids
def test_proc_permission_error_skips_pid(self):
def _isdir(path):
return str(path) == "/proc"
def _listdir(path):
if str(path) == "/proc":
return ["12345", "self"]
raise FileNotFoundError
def _open(path, mode="r", **kwargs):
raise PermissionError("no access")
with (
patch("hermes_cli.gateway.is_windows", return_value=False),
patch("os.path.isdir", side_effect=_isdir),
patch("os.listdir", side_effect=_listdir),
patch("builtins.open", side_effect=_open),
patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
patch("subprocess.run") as mock_ps,
):
pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
# PermissionError swallowed — empty result, no crash
assert 12345 not in pids
mock_ps.assert_not_called() # /proc dir existed, so ps not called

View file

@ -1,13 +1,16 @@
"""Tests for gateway service management helpers."""
import os
import pwd
import subprocess
from pathlib import Path
from types import SimpleNamespace
import pytest
pwd = pytest.importorskip("pwd")
import hermes_cli.gateway as gateway_cli
from gateway import status
from gateway.restart import (
DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
GATEWAY_SERVICE_RESTART_EXIT_CODE,
@ -89,6 +92,13 @@ class TestSystemdServiceRefresh:
monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")
calls = []
monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
monkeypatch.setattr(gateway_cli, "_recover_pending_systemd_restart", lambda system=False, previous_pid=None: False)
monkeypatch.setattr(
gateway_cli,
"_wait_for_systemd_service_restart",
lambda system=False, previous_pid=None: calls.append(("wait", system, previous_pid)) or True,
)
def fake_run(cmd, check=True, **kwargs):
calls.append(cmd)
@ -99,16 +109,218 @@ class TestSystemdServiceRefresh:
gateway_cli.systemd_restart()
assert unit_path.read_text(encoding="utf-8") == "new unit\n"
assert calls[:4] == [
assert calls[:5] == [
["systemctl", "--user", "daemon-reload"],
["systemctl", "--user", "show", gateway_cli.get_service_name(), "--no-pager", "--property", "ActiveState,SubState,Result,ExecMainStatus"],
["systemctl", "--user", "show", gateway_cli.get_service_name(), "--no-pager", "--property", "ActiveState,SubState,Result,ExecMainStatus,MainPID"],
["systemctl", "--user", "reset-failed", gateway_cli.get_service_name()],
["systemctl", "--user", "reload-or-restart", gateway_cli.get_service_name()],
["systemctl", "--user", "restart", gateway_cli.get_service_name()],
("wait", False, None),
]
def test_systemd_stop_marks_running_gateway_as_planned_stop(self, monkeypatch):
calls = []
markers = []
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
monkeypatch.setattr(status, "get_running_pid", lambda cleanup_stale=True: 321)
monkeypatch.setattr(
status,
"write_planned_stop_marker",
lambda pid: markers.append(pid) or True,
)
def fake_run_systemctl(args, **kwargs):
calls.append(args)
return SimpleNamespace(returncode=0, stdout="", stderr="")
monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
gateway_cli.systemd_stop()
assert markers == [321]
assert calls == [["stop", gateway_cli.get_service_name()]]
def test_systemd_stop_timeout_prints_status_guidance(self, monkeypatch, capsys):
markers = []
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
monkeypatch.setattr(status, "get_running_pid", lambda cleanup_stale=True: 321)
monkeypatch.setattr(
status,
"write_planned_stop_marker",
lambda pid: markers.append(pid) or True,
)
def fake_run_systemctl(args, **kwargs):
raise subprocess.TimeoutExpired(args, kwargs.get("timeout"))
monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
gateway_cli.systemd_stop()
assert markers == [321]
output = capsys.readouterr().out
assert "still stopping after 90s" in output
assert "hermes gateway status" in output
def test_systemd_restart_timeout_prints_status_guidance(self, monkeypatch, capsys):
"""`hermes gateway restart` must not surface a raw TimeoutExpired traceback.
The dashboard spawns `hermes gateway restart` in the background; when a
wedged adapter websocket pushes drain past the 90s CLI timeout, the
dashboard would previously show a Python traceback (issue #19937
follow-up: the same failure mode applies to restart, not just stop).
"""
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
monkeypatch.setattr(gateway_cli, "_preflight_user_systemd", lambda: None)
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
monkeypatch.setattr(status, "get_running_pid", lambda cleanup_stale=True: None)
monkeypatch.setattr(gateway_cli, "_systemd_main_pid", lambda system=False: None)
monkeypatch.setattr(
gateway_cli,
"_recover_pending_systemd_restart",
lambda system=False, previous_pid=None: False,
)
monkeypatch.setattr(
gateway_cli,
"_systemd_service_is_start_limited",
lambda system=False: False,
)
def fake_run_systemctl(args, **kwargs):
# reset-failed is a pre-step (check=False, 30s) — let it pass.
if args and args[0] == "reset-failed":
return SimpleNamespace(returncode=0, stdout="", stderr="")
raise subprocess.TimeoutExpired(args, kwargs.get("timeout"))
monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
gateway_cli.systemd_restart()
output = capsys.readouterr().out
assert "still restarting after 90s" in output
assert "hermes gateway status" in output
def test_run_gateway_refreshes_outdated_unit_on_boot(self, tmp_path, monkeypatch):
"""run_gateway() should refresh the systemd unit on boot so that
restart settings take effect even when the process was respawned
via exit-code-75 (bypassing `hermes gateway restart`)."""
unit_path = tmp_path / "hermes-gateway.service"
unit_path.write_text("old unit\n", encoding="utf-8")
monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
monkeypatch.setattr(gateway_cli, "generate_systemd_unit", lambda system=False, run_as_user=None: "new unit\n")
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
calls = []
def fake_run(cmd, check=True, **kwargs):
calls.append(cmd)
return SimpleNamespace(returncode=0, stdout="", stderr="")
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
# Prevent run_gateway from actually starting the gateway
async def fake_start_gateway(**kwargs):
return True
monkeypatch.setattr("gateway.run.start_gateway", fake_start_gateway)
gateway_cli.run_gateway()
assert unit_path.read_text(encoding="utf-8") == "new unit\n"
assert ["systemctl", "--user", "daemon-reload"] in calls
def test_refresh_refuses_to_bake_pytest_tmpdir_into_real_user_unit(
self, tmp_path, monkeypatch
):
"""Defense in depth: ``refresh_systemd_unit_if_needed()`` runs every
time ``run_gateway()`` starts. The user-scope unit path resolves
under ``Path.home()`` (NOT sandboxed by conftest), and
``generate_systemd_unit()`` bakes ``HERMES_HOME`` into the unit's
``Environment=`` line. Without this guard, any test that drives
``run_gateway()`` end-to-end on a real Linux dev box silently
rewrites the developer's installed gateway unit with a
``/tmp/pytest-of-.../hermes_test`` HERMES_HOME silently breaking
their gateway on the next boot. The guard sniffs the generated
unit body for tmpdir markers and refuses the write. Tests that
legitimately exercise the refresh flow patch
``generate_systemd_unit`` to return synthetic content that doesn't
carry those markers.
"""
unit_path = tmp_path / "hermes-gateway.service"
unit_path.write_text("old unit\n", encoding="utf-8")
monkeypatch.setattr(
gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path
)
# Realistic generated unit referencing a pytest tmpdir HERMES_HOME
polluted_unit = (
"[Service]\n"
'Environment="HERMES_HOME=/tmp/pytest-of-alice/pytest-42/'
'popen-gw0/test_x/hermes_test"\n'
)
monkeypatch.setattr(
gateway_cli,
"generate_systemd_unit",
lambda system=False, run_as_user=None: polluted_unit,
)
# If the guard fails, daemon-reload would be called — record it.
ran = []
def fake_run(cmd, check=True, **kwargs):
ran.append(cmd)
return SimpleNamespace(returncode=0, stdout="", stderr="")
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_run)
result = gateway_cli.refresh_systemd_unit_if_needed(system=False)
assert result is False, "refresh should refuse to write a polluted unit"
assert (
unit_path.read_text(encoding="utf-8") == "old unit\n"
), "installed unit must be left untouched"
assert not any(
"daemon-reload" in str(c) for c in ran
), "daemon-reload must not run when write was refused"
class TestRequireServiceInstalled:
def test_exits_with_install_hint_when_unit_missing(self, tmp_path, monkeypatch, capsys):
unit_path = tmp_path / "hermes-gateway.service"
monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
with pytest.raises(SystemExit) as exc_info:
gateway_cli._require_service_installed("start")
assert exc_info.value.code == 1
out = capsys.readouterr().out
assert "not installed" in out
assert "hermes gateway install" in out
def test_passes_when_unit_exists(self, tmp_path, monkeypatch):
unit_path = tmp_path / "hermes-gateway.service"
unit_path.write_text("[Unit]\n", encoding="utf-8")
monkeypatch.setattr(gateway_cli, "get_systemd_unit_path", lambda system=False: unit_path)
gateway_cli._require_service_installed("start")
class TestGeneratedSystemdUnits:
def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self):
def _expected_timeout_stop_sec(self) -> str:
timeout = int(max(60, DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT) + 30)
return f"TimeoutStopSec={timeout}"
def test_user_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self, monkeypatch):
monkeypatch.setattr(
gateway_cli,
"_get_restart_drain_timeout",
lambda: DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
)
unit = gateway_cli.generate_systemd_unit(system=False)
assert "ExecStart=" in unit
@ -118,7 +330,7 @@ class TestGeneratedSystemdUnits:
# TimeoutStopSec must exceed the default drain_timeout (60s) so
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
# (tool subprocess kill, adapter disconnect) runs — issue #8202.
assert "TimeoutStopSec=90" in unit
assert self._expected_timeout_stop_sec() in unit
def test_user_unit_includes_resolved_node_directory_in_path(self, monkeypatch):
monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: "/home/test/.nvm/versions/node/v24.14.0/bin/node" if cmd == "node" else None)
@ -127,7 +339,49 @@ class TestGeneratedSystemdUnits:
assert "/home/test/.nvm/versions/node/v24.14.0/bin" in unit
def test_system_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self):
def test_user_unit_includes_wsl_windows_interop_paths(self, monkeypatch):
monkeypatch.setattr(gateway_cli, "is_wsl", lambda: True)
monkeypatch.setenv(
"PATH",
"/usr/local/bin:/mnt/c/WINDOWS/system32:/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/",
)
monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: None)
unit = gateway_cli.generate_systemd_unit(system=False)
assert "/mnt/c/WINDOWS/system32" in unit
assert "/mnt/c/WINDOWS/System32/WindowsPowerShell/v1.0/" in unit
def test_user_unit_omits_windows_interop_paths_outside_wsl(self, monkeypatch):
monkeypatch.setattr(gateway_cli, "is_wsl", lambda: False)
monkeypatch.setenv("PATH", "/usr/local/bin:/mnt/c/WINDOWS/system32")
monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: None)
unit = gateway_cli.generate_systemd_unit(system=False)
assert "/mnt/c/WINDOWS/system32" not in unit
def test_system_unit_includes_wsl_windows_interop_paths(self, monkeypatch):
monkeypatch.setattr(gateway_cli, "is_wsl", lambda: True)
monkeypatch.setattr(
gateway_cli,
"_system_service_identity",
lambda run_as_user=None: ("alice", "alice", "/home/alice"),
)
monkeypatch.setattr(gateway_cli, "_hermes_home_for_target_user", lambda home: "/home/alice/.hermes")
monkeypatch.setenv("PATH", "/usr/local/bin:/mnt/c/WINDOWS/system32")
monkeypatch.setattr(gateway_cli.shutil, "which", lambda cmd: None)
unit = gateway_cli.generate_systemd_unit(system=True, run_as_user="alice")
assert "/mnt/c/WINDOWS/system32" in unit
def test_system_unit_avoids_recursive_execstop_and_uses_extended_stop_timeout(self, monkeypatch):
monkeypatch.setattr(
gateway_cli,
"_get_restart_drain_timeout",
lambda: DEFAULT_GATEWAY_RESTART_DRAIN_TIMEOUT,
)
unit = gateway_cli.generate_systemd_unit(system=True)
assert "ExecStart=" in unit
@ -137,7 +391,7 @@ class TestGeneratedSystemdUnits:
# TimeoutStopSec must exceed the default drain_timeout (60s) so
# systemd doesn't SIGKILL the cgroup before post-interrupt cleanup
# (tool subprocess kill, adapter disconnect) runs — issue #8202.
assert "TimeoutStopSec=90" in unit
assert self._expected_timeout_stop_sec() in unit
assert "WantedBy=multi-user.target" in unit
@ -483,64 +737,145 @@ class TestGatewayServiceDetection:
assert gateway_cli._is_service_running() is False
class TestGatewaySystemServiceRouting:
def test_systemd_restart_self_requests_graceful_restart_and_waits(self, monkeypatch, capsys):
def test_systemd_restart_gracefully_restarts_running_service_and_waits(self, monkeypatch, capsys):
calls = []
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: calls.append(("refresh", system)))
monkeypatch.setattr(gateway_cli, "_get_restart_drain_timeout", lambda: 12.0)
monkeypatch.setattr(
"gateway.status.get_running_pid",
lambda: 654,
)
monkeypatch.setattr(
gateway_cli,
"_request_gateway_self_restart",
lambda pid: calls.append(("self", pid)) or True,
"_graceful_restart_via_sigusr1",
lambda pid, timeout: calls.append(("graceful", pid, timeout)) or True,
)
# Simulate: old process dies immediately, new process becomes active
kill_call_count = [0]
def fake_kill(pid, sig):
kill_call_count[0] += 1
if kill_call_count[0] >= 2: # first call checks, second = dead
raise ProcessLookupError()
monkeypatch.setattr(os, "kill", fake_kill)
# Simulate systemctl reset-failed/start followed by an active unit
new_pid = [None]
# Simulate systemctl reset-failed/restart followed by an active unit.
# A plain start does not break systemd's auto-restart timer once the
# old gateway has exited with the planned restart code.
def fake_subprocess_run(cmd, **kwargs):
if "reset-failed" in cmd:
calls.append(("reset-failed", cmd))
return SimpleNamespace(stdout="", returncode=0)
if "start" in cmd:
calls.append(("start", cmd))
if "restart" in cmd:
calls.append(("restart", cmd))
return SimpleNamespace(stdout="", returncode=0)
if "show" in cmd:
new_pid[0] = 999
return SimpleNamespace(
stdout="ActiveState=active\nSubState=running\nResult=success\nExecMainStatus=0\n",
returncode=0,
)
raise AssertionError(f"Unexpected systemctl call: {cmd}")
monkeypatch.setattr(gateway_cli.subprocess, "run", fake_subprocess_run)
# get_running_pid returns new PID after restart
pid_calls = [0]
def fake_get_pid():
pid_calls[0] += 1
return 999 if pid_calls[0] > 1 else 654
monkeypatch.setattr("gateway.status.get_running_pid", fake_get_pid)
monkeypatch.setattr(
gateway_cli,
"_wait_for_systemd_service_restart",
lambda system=False, previous_pid=None: calls.append(("wait", system, previous_pid)) or True,
)
gateway_cli.systemd_restart()
assert ("self", 654) in calls
assert ("graceful", 654, 17.0) in calls
assert any(call[0] == "reset-failed" for call in calls)
assert any(call[0] == "start" for call in calls)
assert any(call[0] == "restart" for call in calls)
assert ("wait", False, 654) in calls
out = capsys.readouterr().out.lower()
assert "restarted" in out
assert "restarting gracefully" in out
def test_systemd_restart_uses_systemd_main_pid_when_pid_file_is_missing(self, monkeypatch, capsys):
calls = []
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
monkeypatch.setattr(gateway_cli, "_get_restart_drain_timeout", lambda: 10.0)
monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
monkeypatch.setattr(
gateway_cli,
"_read_systemd_unit_properties",
lambda system=False: {
"ActiveState": "active",
"SubState": "running",
"Result": "success",
"ExecMainStatus": "0",
"MainPID": "777",
},
)
monkeypatch.setattr(
gateway_cli,
"_graceful_restart_via_sigusr1",
lambda pid, timeout: calls.append(("graceful", pid, timeout)) or True,
)
monkeypatch.setattr(gateway_cli, "_run_systemctl", lambda args, **kwargs: calls.append(args) or SimpleNamespace(stdout="", returncode=0))
monkeypatch.setattr(
gateway_cli,
"_wait_for_systemd_service_restart",
lambda system=False, previous_pid=None: calls.append(("wait", system, previous_pid)) or True,
)
gateway_cli.systemd_restart()
assert ("graceful", 777, 15.0) in calls
assert ("wait", False, 777) in calls
assert "restarting gracefully (pid 777)" in capsys.readouterr().out.lower()
def test_wait_for_systemd_restart_waits_for_runtime_running(self, monkeypatch, capsys):
monkeypatch.setattr(
gateway_cli,
"_read_systemd_unit_properties",
lambda system=False: {
"ActiveState": "active",
"SubState": "running",
"Result": "success",
"ExecMainStatus": "0",
"MainPID": "999",
},
)
monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
monkeypatch.setattr(
gateway_cli,
"_gateway_runtime_status_for_pid",
lambda pid: {"pid": pid, "gateway_state": "running"},
)
assert gateway_cli._wait_for_systemd_service_restart(previous_pid=777, timeout=0.1) is True
assert "restarted (pid 999)" in capsys.readouterr().out.lower()
def test_systemd_restart_reports_start_limit_hit(self, monkeypatch, capsys):
calls = []
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
monkeypatch.setattr("gateway.status.get_running_pid", lambda: None)
monkeypatch.setattr(gateway_cli, "_recover_pending_systemd_restart", lambda system=False, previous_pid=None: False)
def fake_run_systemctl(args, **kwargs):
calls.append(args)
if args[0] == "show":
return SimpleNamespace(stdout="ActiveState=inactive\nSubState=dead\nResult=success\nExecMainStatus=0\nMainPID=0\n", stderr="", returncode=0)
if args[0] == "reset-failed":
return SimpleNamespace(stdout="", stderr="", returncode=0)
if args[0] == "restart":
raise subprocess.CalledProcessError(
1,
["systemctl", "--user", *args],
stderr="Job failed. See result 'start-limit-hit'.",
)
raise AssertionError(f"Unexpected args: {args}")
monkeypatch.setattr(gateway_cli, "_run_systemctl", fake_run_systemctl)
gateway_cli.systemd_restart()
assert ["restart", gateway_cli.get_service_name()] in calls
out = capsys.readouterr().out.lower()
assert "rate-limited by systemd" in out
assert "reset-failed" in out
def test_systemd_restart_recovers_failed_planned_restart(self, monkeypatch, capsys):
monkeypatch.setattr(gateway_cli, "_select_systemd_scope", lambda system=False: False)
monkeypatch.setattr(gateway_cli, "_require_service_installed", lambda action, system=False: None)
monkeypatch.setattr(gateway_cli, "refresh_systemd_unit_if_needed", lambda system=False: None)
monkeypatch.setattr(
"gateway.status.read_runtime_status",
@ -581,6 +916,11 @@ class TestGatewaySystemServiceRouting:
"gateway.status.get_running_pid",
lambda: 999 if started["value"] else None,
)
monkeypatch.setattr(
gateway_cli,
"_gateway_runtime_status_for_pid",
lambda pid: {"pid": pid, "gateway_state": "running"},
)
gateway_cli.systemd_restart()
@ -999,20 +1339,17 @@ class TestSystemServiceIdentityRootHandling:
def test_auto_detected_root_is_rejected(self, monkeypatch):
"""When root is auto-detected (not explicitly requested), raise."""
import pwd
import grp
monkeypatch.delenv("SUDO_USER", raising=False)
monkeypatch.setenv("USER", "root")
monkeypatch.setenv("LOGNAME", "root")
import pytest
with pytest.raises(ValueError, match="pass --run-as-user root to override"):
gateway_cli._system_service_identity(run_as_user=None)
def test_explicit_root_is_allowed(self, monkeypatch):
"""When root is explicitly passed via --run-as-user root, allow it."""
import pwd
import grp
root_info = pwd.getpwnam("root")
@ -1024,7 +1361,6 @@ class TestSystemServiceIdentityRootHandling:
def test_non_root_user_passes_through(self, monkeypatch):
"""Normal non-root user works as before."""
import pwd
import grp
monkeypatch.delenv("SUDO_USER", raising=False)
@ -2047,3 +2383,171 @@ class TestSystemdInstallOffersLegacyRemoval:
assert prompt_called["count"] == 0
assert remove_called["invoked"] is False
class TestSystemScopeRequiresRootError:
"""Tests for the SystemScopeRequiresRootError replacement of sys.exit(1).
Before this change, ``_require_root_for_system_service`` called
``sys.exit(1)`` when non-root code tried a system-scope systemd
operation. The wizard's ``except Exception`` guards don't catch
``SystemExit`` (it's a ``BaseException`` subclass), so the user was
dumped at a bare shell prompt mid-setup. The fix raises a typed
exception instead, which the wizard intercepts and handles with
actionable remediation.
"""
def test_require_root_raises_when_non_root(self, monkeypatch):
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
with pytest.raises(gateway_cli.SystemScopeRequiresRootError) as excinfo:
gateway_cli._require_root_for_system_service("start")
assert excinfo.value.args[0] == "System gateway start requires root. Re-run with sudo."
assert excinfo.value.args[1] == "start"
# str(e) renders only the message, not the tuple repr, so that
# wizard format strings like f"Failed: {e}" print cleanly.
assert str(excinfo.value) == "System gateway start requires root. Re-run with sudo."
assert f"Failed: {excinfo.value}" == "Failed: System gateway start requires root. Re-run with sudo."
def test_require_root_noop_when_root(self, monkeypatch):
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 0)
# Should not raise, should not exit
gateway_cli._require_root_for_system_service("start")
def test_error_is_runtime_error_subclass(self):
"""Wizards use ``except Exception`` guards — the error must be a
``RuntimeError`` (catchable by ``Exception``), NOT a ``SystemExit``
(``BaseException``), so the wizard can recover from it.
"""
err = gateway_cli.SystemScopeRequiresRootError("msg", "start")
assert isinstance(err, RuntimeError)
assert isinstance(err, Exception)
assert not isinstance(err, SystemExit)
class TestSystemScopeWizardPreCheck:
"""Tests for _system_scope_wizard_would_need_root — the guard the
wizard uses to detect the dead-end BEFORE prompting the user to start
a service that will fail without sudo.
"""
@staticmethod
def _setup_units(tmp_path, monkeypatch, system_present: bool, user_present: bool):
sys_dir = tmp_path / "sys"
usr_dir = tmp_path / "usr"
sys_dir.mkdir()
usr_dir.mkdir()
if system_present:
(sys_dir / "hermes-gateway.service").write_text("[Unit]\n")
if user_present:
(usr_dir / "hermes-gateway.service").write_text("[Unit]\n")
monkeypatch.setattr(
gateway_cli,
"get_systemd_unit_path",
lambda system=False: (sys_dir if system else usr_dir) / "hermes-gateway.service",
)
def test_non_root_with_only_system_unit_returns_true(self, tmp_path, monkeypatch):
self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=False)
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
assert gateway_cli._system_scope_wizard_would_need_root() is True
def test_root_never_needs_root(self, tmp_path, monkeypatch):
self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=False)
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 0)
assert gateway_cli._system_scope_wizard_would_need_root() is False
def test_non_root_with_user_unit_present_returns_false(self, tmp_path, monkeypatch):
# User-scope unit present — user can start it themselves, no sudo needed.
self._setup_units(tmp_path, monkeypatch, system_present=True, user_present=True)
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
assert gateway_cli._system_scope_wizard_would_need_root() is False
def test_non_root_with_no_units_returns_false(self, tmp_path, monkeypatch):
self._setup_units(tmp_path, monkeypatch, system_present=False, user_present=False)
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
assert gateway_cli._system_scope_wizard_would_need_root() is False
def test_non_root_with_explicit_system_arg_returns_true(self, tmp_path, monkeypatch):
# Caller passed system=True explicitly (e.g. ``hermes gateway start --system``).
self._setup_units(tmp_path, monkeypatch, system_present=False, user_present=False)
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
assert gateway_cli._system_scope_wizard_would_need_root(system=True) is True
class TestSystemScopeRemediationOutput:
"""Tests for _print_system_scope_remediation — the actionable guidance
shown when the wizard detects a system-scope-only setup as non-root.
"""
def test_start_remediation_mentions_sudo_systemctl_and_uninstall(self, capsys, monkeypatch):
monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
gateway_cli._print_system_scope_remediation("start")
out = capsys.readouterr().out
assert "system-wide service" in out
assert "start requires root" in out
assert "sudo systemctl start hermes-gateway" in out
assert "sudo hermes gateway uninstall --system" in out
assert "hermes gateway install" in out
def test_restart_remediation_uses_systemctl_restart(self, capsys, monkeypatch):
monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
gateway_cli._print_system_scope_remediation("restart")
out = capsys.readouterr().out
assert "restart requires root" in out
assert "sudo systemctl restart hermes-gateway" in out
def test_stop_remediation_uses_systemctl_stop(self, capsys, monkeypatch):
monkeypatch.setattr(gateway_cli, "get_service_name", lambda: "hermes-gateway")
gateway_cli._print_system_scope_remediation("stop")
out = capsys.readouterr().out
assert "stop requires root" in out
assert "sudo systemctl stop hermes-gateway" in out
class TestGatewayCommandCatchesSystemScopeError:
"""The direct CLI path (``hermes gateway start --system`` etc.) must
still exit 1 with a clean message when non-root. The top-level
``gateway_command`` catches ``SystemScopeRequiresRootError`` and
converts it back to ``sys.exit(1)``, preserving existing CLI behavior.
"""
def test_non_root_system_start_exits_one_with_clean_message(self, tmp_path, monkeypatch, capsys):
sys_dir = tmp_path / "sys"
usr_dir = tmp_path / "usr"
sys_dir.mkdir()
usr_dir.mkdir()
(sys_dir / "hermes-gateway.service").write_text("[Unit]\n")
monkeypatch.setattr(
gateway_cli,
"get_systemd_unit_path",
lambda system=False: (sys_dir if system else usr_dir) / "hermes-gateway.service",
)
monkeypatch.setattr(gateway_cli.os, "geteuid", lambda: 1000)
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
monkeypatch.setattr(gateway_cli, "kill_gateway_processes", lambda **kw: 0)
args = SimpleNamespace(gateway_command="start", system=True, all=False)
with pytest.raises(SystemExit) as excinfo:
gateway_cli.gateway_command(args)
assert excinfo.value.code == 1
out = capsys.readouterr().out
# Renders the message, NOT the ``('msg', 'action')`` tuple repr
assert "System gateway start requires root. Re-run with sudo." in out
assert "('" not in out # no tuple repr leaking through

View file

@ -269,9 +269,9 @@ class TestGmiModelMetadata:
class TestGmiAuxiliary:
def test_aux_default_model(self):
from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
from agent.auxiliary_client import _get_aux_model_for_provider
assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "google/gemini-3.1-flash-lite-preview"
assert _get_aux_model_for_provider("gmi") == "google/gemini-3.1-flash-lite-preview"
def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
@ -284,6 +284,22 @@ class TestGmiAuxiliary:
assert model == "google/gemini-3.1-flash-lite-preview"
assert mock_openai.call_args.kwargs["api_key"] == "gmi-test-key"
assert mock_openai.call_args.kwargs["base_url"] == "https://api.gmi-serving.com/v1"
# GMI profile declares default_headers with a HermesAgent User-Agent
# for traffic attribution. The generic profile-fallback branch in
# resolve_provider_client should carry it through to the OpenAI client.
headers = mock_openai.call_args.kwargs.get("default_headers", {})
assert headers.get("User-Agent", "").startswith("HermesAgent/")
def test_gmi_profile_declares_hermes_user_agent(self):
"""The GMI plugin sets a HermesAgent/<ver> User-Agent on its profile."""
from providers import get_provider_profile
profile = get_provider_profile("gmi")
assert profile is not None
ua = profile.default_headers.get("User-Agent", "")
assert ua.startswith("HermesAgent/"), (
f"expected GMI profile User-Agent to start with 'HermesAgent/', got {ua!r}"
)
def test_resolve_provider_client_accepts_gmi_alias(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")

View file

@ -0,0 +1,516 @@
"""Tests for hermes_cli/goals.py — persistent cross-turn goals."""
from __future__ import annotations
import json
from unittest.mock import patch, MagicMock
import pytest
# ──────────────────────────────────────────────────────────────────────
# Fixtures
# ──────────────────────────────────────────────────────────────────────
@pytest.fixture
def hermes_home(tmp_path, monkeypatch):
"""Isolated HERMES_HOME so SessionDB.state_meta writes don't clobber the real one."""
from pathlib import Path
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
# Bust the goal-module's DB cache for each test so it re-resolves HERMES_HOME.
from hermes_cli import goals
goals._DB_CACHE.clear()
yield home
goals._DB_CACHE.clear()
# ──────────────────────────────────────────────────────────────────────
# _parse_judge_response
# ──────────────────────────────────────────────────────────────────────
class TestParseJudgeResponse:
def test_clean_json_done(self):
from hermes_cli.goals import _parse_judge_response
done, reason, _ = _parse_judge_response('{"done": true, "reason": "all good"}')
assert done is True
assert reason == "all good"
def test_clean_json_continue(self):
from hermes_cli.goals import _parse_judge_response
done, reason, _ = _parse_judge_response('{"done": false, "reason": "more work needed"}')
assert done is False
assert reason == "more work needed"
def test_json_in_markdown_fence(self):
from hermes_cli.goals import _parse_judge_response
raw = '```json\n{"done": true, "reason": "done"}\n```'
done, reason, _ = _parse_judge_response(raw)
assert done is True
assert "done" in reason
def test_json_embedded_in_prose(self):
"""Some models prefix reasoning before emitting JSON — we extract it."""
from hermes_cli.goals import _parse_judge_response
raw = 'Looking at this... the agent says X. Verdict: {"done": false, "reason": "partial"}'
done, reason, _ = _parse_judge_response(raw)
assert done is False
assert reason == "partial"
def test_string_done_values(self):
from hermes_cli.goals import _parse_judge_response
for s in ("true", "yes", "done", "1"):
done, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
assert done is True
for s in ("false", "no", "not yet"):
done, _, _ = _parse_judge_response(f'{{"done": "{s}", "reason": "r"}}')
assert done is False
def test_malformed_json_fails_open(self):
"""Non-JSON → not done, with error-ish reason (so judge_goal can map to continue)."""
from hermes_cli.goals import _parse_judge_response
done, reason, _ = _parse_judge_response("this is not json at all")
assert done is False
assert reason # non-empty
def test_empty_response(self):
from hermes_cli.goals import _parse_judge_response
done, reason, _ = _parse_judge_response("")
assert done is False
assert reason
# ──────────────────────────────────────────────────────────────────────
# judge_goal — fail-open semantics
# ──────────────────────────────────────────────────────────────────────
class TestJudgeGoal:
def test_empty_goal_skipped(self):
from hermes_cli.goals import judge_goal
verdict, _, _ = judge_goal("", "some response")
assert verdict == "skipped"
def test_empty_response_continues(self):
from hermes_cli.goals import judge_goal
verdict, _, _ = judge_goal("ship the thing", "")
assert verdict == "continue"
def test_no_aux_client_continues(self):
"""Fail-open: if no aux client, we must return continue, not skipped/done."""
from hermes_cli import goals
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(None, None),
):
verdict, _, _ = goals.judge_goal("my goal", "my response")
assert verdict == "continue"
def test_api_error_continues(self):
"""Judge exception → fail-open continue (don't wedge progress on judge bugs)."""
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.side_effect = RuntimeError("boom")
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, reason, _ = goals.judge_goal("goal", "response")
assert verdict == "continue"
assert "judge error" in reason.lower()
def test_judge_says_done(self):
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.return_value = MagicMock(
choices=[
MagicMock(
message=MagicMock(content='{"done": true, "reason": "achieved"}')
)
]
)
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, reason, _ = goals.judge_goal("goal", "agent response")
assert verdict == "done"
assert reason == "achieved"
def test_judge_says_continue(self):
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.return_value = MagicMock(
choices=[
MagicMock(
message=MagicMock(content='{"done": false, "reason": "not yet"}')
)
]
)
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, reason, _ = goals.judge_goal("goal", "agent response")
assert verdict == "continue"
assert reason == "not yet"
# ──────────────────────────────────────────────────────────────────────
# GoalManager lifecycle + persistence
# ──────────────────────────────────────────────────────────────────────
class TestGoalManager:
def test_no_goal_initial(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-1")
assert mgr.state is None
assert not mgr.is_active()
assert not mgr.has_goal()
assert "No active goal" in mgr.status_line()
def test_set_then_status(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-2", default_max_turns=5)
state = mgr.set("port the thing")
assert state.goal == "port the thing"
assert state.status == "active"
assert state.max_turns == 5
assert state.turns_used == 0
assert mgr.is_active()
assert "active" in mgr.status_line().lower()
assert "port the thing" in mgr.status_line()
def test_set_rejects_empty(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-3")
with pytest.raises(ValueError):
mgr.set("")
with pytest.raises(ValueError):
mgr.set(" ")
def test_pause_and_resume(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-4")
mgr.set("goal text")
mgr.pause(reason="user-paused")
assert mgr.state.status == "paused"
assert not mgr.is_active()
assert mgr.has_goal()
mgr.resume()
assert mgr.state.status == "active"
assert mgr.is_active()
def test_clear(self, hermes_home):
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="test-sid-5")
mgr.set("goal")
mgr.clear()
assert mgr.state is None
assert not mgr.is_active()
def test_persistence_across_managers(self, hermes_home):
"""Key invariant: a second manager on the same session sees the goal.
This is what makes /resume work each session rebinds its
GoalManager and picks up the saved state.
"""
from hermes_cli.goals import GoalManager
mgr1 = GoalManager(session_id="persist-sid")
mgr1.set("do the thing")
mgr2 = GoalManager(session_id="persist-sid")
assert mgr2.state is not None
assert mgr2.state.goal == "do the thing"
assert mgr2.is_active()
def test_evaluate_after_turn_done(self, hermes_home):
"""Judge says done → status=done, no continuation."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-1")
mgr.set("ship it")
with patch.object(goals, "judge_goal", return_value=("done", "shipped", False)):
decision = mgr.evaluate_after_turn("I shipped the feature.")
assert decision["verdict"] == "done"
assert decision["should_continue"] is False
assert decision["continuation_prompt"] is None
assert mgr.state.status == "done"
assert mgr.state.turns_used == 1
def test_evaluate_after_turn_continue_under_budget(self, hermes_home):
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-2", default_max_turns=5)
mgr.set("a long goal")
with patch.object(goals, "judge_goal", return_value=("continue", "more work", False)):
decision = mgr.evaluate_after_turn("made some progress")
assert decision["verdict"] == "continue"
assert decision["should_continue"] is True
assert decision["continuation_prompt"] is not None
assert "a long goal" in decision["continuation_prompt"]
assert mgr.state.status == "active"
assert mgr.state.turns_used == 1
def test_evaluate_after_turn_budget_exhausted(self, hermes_home):
"""When turn budget hits ceiling, auto-pause instead of continuing."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-3", default_max_turns=2)
mgr.set("hard goal")
with patch.object(goals, "judge_goal", return_value=("continue", "not yet", False)):
d1 = mgr.evaluate_after_turn("step 1")
assert d1["should_continue"] is True
assert mgr.state.turns_used == 1
assert mgr.state.status == "active"
d2 = mgr.evaluate_after_turn("step 2")
# turns_used is now 2 which equals max_turns → paused
assert d2["should_continue"] is False
assert mgr.state.status == "paused"
assert mgr.state.turns_used == 2
assert "budget" in (mgr.state.paused_reason or "").lower()
def test_evaluate_after_turn_inactive(self, hermes_home):
"""evaluate_after_turn is a no-op when goal isn't active."""
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="eval-sid-4")
d = mgr.evaluate_after_turn("anything")
assert d["verdict"] == "inactive"
assert d["should_continue"] is False
mgr.set("a goal")
mgr.pause()
d2 = mgr.evaluate_after_turn("anything")
assert d2["verdict"] == "inactive"
assert d2["should_continue"] is False
def test_continuation_prompt_shape(self, hermes_home):
"""The continuation prompt must include the goal text verbatim —
and must be safe to inject as a user-role message (prompt-cache
invariants: no system-prompt mutation)."""
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="cont-sid")
mgr.set("port goal command to hermes")
prompt = mgr.next_continuation_prompt()
assert prompt is not None
assert "port goal command to hermes" in prompt
assert prompt.strip() # non-empty
# ──────────────────────────────────────────────────────────────────────
# Smoke: CommandDef is wired
# ──────────────────────────────────────────────────────────────────────
def test_goal_command_in_registry():
from hermes_cli.commands import resolve_command
cmd = resolve_command("goal")
assert cmd is not None
assert cmd.name == "goal"
def test_goal_command_dispatches_in_cli_registry_helpers():
"""goal shows up in autocomplete / help categories alongside other Session cmds."""
from hermes_cli.commands import COMMANDS, COMMANDS_BY_CATEGORY
assert "/goal" in COMMANDS
session_cmds = COMMANDS_BY_CATEGORY.get("Session", {})
assert "/goal" in session_cmds
# ──────────────────────────────────────────────────────────────────────
# Auto-pause on consecutive judge parse failures
# ──────────────────────────────────────────────────────────────────────
class TestJudgeParseFailureAutoPause:
"""Regression: weak judge models (e.g. deepseek-v4-flash) that return
empty strings or non-JSON prose must auto-pause the loop after N turns
instead of burning the whole turn budget."""
def test_parse_response_flags_empty_as_parse_failure(self):
from hermes_cli.goals import _parse_judge_response
done, reason, parse_failed = _parse_judge_response("")
assert done is False
assert parse_failed is True
assert "empty" in reason.lower()
def test_parse_response_flags_non_json_as_parse_failure(self):
from hermes_cli.goals import _parse_judge_response
done, reason, parse_failed = _parse_judge_response(
"Let me analyze whether the goal is fully satisfied based on the agent's response..."
)
assert done is False
assert parse_failed is True
assert "not json" in reason.lower()
def test_parse_response_clean_json_is_not_parse_failure(self):
from hermes_cli.goals import _parse_judge_response
done, _, parse_failed = _parse_judge_response(
'{"done": false, "reason": "more work"}'
)
assert done is False
assert parse_failed is False
def test_api_error_does_not_count_as_parse_failure(self):
"""Transient network/API errors must not trip the auto-pause guard."""
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.side_effect = RuntimeError("connection reset")
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, _, parse_failed = goals.judge_goal("goal", "response")
assert verdict == "continue"
assert parse_failed is False
def test_empty_judge_reply_flagged_as_parse_failure(self):
"""End-to-end: judge returns empty content → parse_failed=True."""
from hermes_cli import goals
fake_client = MagicMock()
fake_client.chat.completions.create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=""))]
)
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(fake_client, "judge-model"),
):
verdict, _, parse_failed = goals.judge_goal("goal", "response")
assert verdict == "continue"
assert parse_failed is True
def test_auto_pause_after_three_consecutive_parse_failures(self, hermes_home):
"""N=3 consecutive parse failures → auto-pause with config pointer."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager, DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES
assert DEFAULT_MAX_CONSECUTIVE_PARSE_FAILURES == 3
mgr = GoalManager(session_id="parse-fail-sid-1", default_max_turns=20)
mgr.set("do a thing")
with patch.object(
goals, "judge_goal", return_value=("continue", "judge returned empty response", True)
):
d1 = mgr.evaluate_after_turn("step 1")
assert d1["should_continue"] is True
assert mgr.state.consecutive_parse_failures == 1
d2 = mgr.evaluate_after_turn("step 2")
assert d2["should_continue"] is True
assert mgr.state.consecutive_parse_failures == 2
d3 = mgr.evaluate_after_turn("step 3")
assert d3["should_continue"] is False
assert d3["status"] == "paused"
assert mgr.state.consecutive_parse_failures == 3
# Message points at the config surface so the user can fix it.
assert "auxiliary" in d3["message"]
assert "goal_judge" in d3["message"]
assert "config.yaml" in d3["message"]
def test_parse_failure_counter_resets_on_good_reply(self, hermes_home):
"""A single good judge reply resets the counter — transient flakes don't pause."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="parse-fail-sid-2", default_max_turns=20)
mgr.set("another goal")
# Two parse failures…
with patch.object(
goals, "judge_goal", return_value=("continue", "not json", True)
):
mgr.evaluate_after_turn("step 1")
mgr.evaluate_after_turn("step 2")
assert mgr.state.consecutive_parse_failures == 2
# …then one clean reply resets the counter.
with patch.object(
goals, "judge_goal", return_value=("continue", "making progress", False)
):
d = mgr.evaluate_after_turn("step 3")
assert d["should_continue"] is True
assert mgr.state.consecutive_parse_failures == 0
def test_parse_failure_counter_not_incremented_by_api_errors(self, hermes_home):
"""API/transport errors must NOT count toward the auto-pause threshold."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager
mgr = GoalManager(session_id="parse-fail-sid-3", default_max_turns=20)
mgr.set("goal")
with patch.object(
goals, "judge_goal", return_value=("continue", "judge error: RuntimeError", False)
):
for _ in range(5):
d = mgr.evaluate_after_turn("still going")
assert d["should_continue"] is True
assert mgr.state.consecutive_parse_failures == 0
assert mgr.state.status == "active"
def test_consecutive_parse_failures_persists_across_goalmanager_reloads(
self, hermes_home
):
"""The counter must be durable so cross-session resumes see it."""
from hermes_cli import goals
from hermes_cli.goals import GoalManager, load_goal
mgr = GoalManager(session_id="parse-fail-sid-4", default_max_turns=20)
mgr.set("persistent goal")
with patch.object(
goals, "judge_goal", return_value=("continue", "empty", True)
):
mgr.evaluate_after_turn("r")
mgr.evaluate_after_turn("r")
reloaded = load_goal("parse-fail-sid-4")
assert reloaded is not None
assert reloaded.consecutive_parse_failures == 2

View file

@ -0,0 +1,492 @@
"""Tests for the multi-board kanban layer (``hermes kanban boards …``).
Covers the pieces added when boards became a first-class concept:
* Slug validation and normalisation.
* Path resolution for ``default`` (legacy ``<root>/kanban.db``) vs
named boards (``<root>/kanban/boards/<slug>/kanban.db``).
* Current-board persistence via ``<root>/kanban/current`` and
``HERMES_KANBAN_BOARD`` env var.
* ``connect(board=)`` isolation writes on one board don't leak.
* ``create_board`` / ``list_boards`` / ``remove_board`` round trip.
* CLI surface: ``hermes kanban boards list/create/switch/rm``.
* ``_default_spawn`` injects ``HERMES_KANBAN_BOARD`` into worker env.
"""
from __future__ import annotations
import json
import os
import subprocess
import sys
from pathlib import Path
import pytest
# Ensure the worktree (not the stale global clone) is first on sys.path.
_WORKTREE = Path(__file__).resolve().parents[2]
if str(_WORKTREE) not in sys.path:
sys.path.insert(0, str(_WORKTREE))
from hermes_cli import kanban_db as kb
# ---------------------------------------------------------------------------
# Fixture
# ---------------------------------------------------------------------------
@pytest.fixture
def fresh_home(tmp_path, monkeypatch):
"""Isolated HERMES_HOME with no prior kanban state.
The autouse hermetic conftest already nukes credentials + TZ; this
fixture layers a per-test HERMES_HOME plus a path-init cache reset
so each test sees a truly empty board set.
"""
home = tmp_path / "hermes_home"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
for var in (
"HERMES_KANBAN_DB",
"HERMES_KANBAN_WORKSPACES_ROOT",
"HERMES_KANBAN_HOME",
"HERMES_KANBAN_BOARD",
):
monkeypatch.delenv(var, raising=False)
# Also reset hermes_constants cache so get_default_hermes_root() re-reads.
try:
import hermes_constants
hermes_constants._cached_default_hermes_root = None # type: ignore[attr-defined]
except Exception:
pass
# Kanban module-level init cache must not leak between tests.
kb._INITIALIZED_PATHS.clear()
return home
# ---------------------------------------------------------------------------
# Slug validation
# ---------------------------------------------------------------------------
class TestSlugValidation:
@pytest.mark.parametrize("good", [
"default", "atm10-server", "hermes-agent", "proj_1", "a",
"very-long-but-still-ok-slug-with-hyphens-and-numbers-1234",
])
def test_accepts_valid(self, good):
assert kb._normalize_board_slug(good) == good
@pytest.mark.parametrize("bad", [
"-leading-hyphen", "_leading_underscore",
"with/slash", "with space",
"has.dot", "has?question",
"..", "../etc", "foo\x00bar",
])
def test_rejects_invalid(self, bad):
with pytest.raises(ValueError):
kb._normalize_board_slug(bad)
def test_empty_returns_none(self):
assert kb._normalize_board_slug(None) is None
assert kb._normalize_board_slug("") is None
assert kb._normalize_board_slug(" ") is None
def test_auto_lowercases(self):
# Uppercase is auto-downcased (friendlier than rejecting). ``Default``
# → ``default``, ``ATM10`` → ``atm10``. The on-disk slug is always
# lowercase regardless of what the user typed.
assert kb._normalize_board_slug("Default") == "default"
assert kb._normalize_board_slug("ATM10-Server") == "atm10-server"
# ---------------------------------------------------------------------------
# Path resolution
# ---------------------------------------------------------------------------
class TestPathResolution:
def test_default_board_legacy_path(self, fresh_home):
"""The default board's DB lives at ``<root>/kanban.db`` for back-compat."""
assert kb.kanban_db_path() == fresh_home / "kanban.db"
assert kb.kanban_db_path(board="default") == fresh_home / "kanban.db"
def test_named_board_under_boards_dir(self, fresh_home):
p = kb.kanban_db_path(board="atm10-server")
assert p == fresh_home / "kanban" / "boards" / "atm10-server" / "kanban.db"
def test_workspaces_per_board(self, fresh_home):
assert kb.workspaces_root() == fresh_home / "kanban" / "workspaces"
# Uppercase input gets auto-downcased to the on-disk slug.
assert kb.workspaces_root(board="projA") == (
fresh_home / "kanban" / "boards" / "proja" / "workspaces"
)
def test_logs_per_board(self, fresh_home):
assert kb.worker_logs_dir() == fresh_home / "kanban" / "logs"
assert kb.worker_logs_dir(board="other") == (
fresh_home / "kanban" / "boards" / "other" / "logs"
)
def test_env_var_db_override_still_wins(self, fresh_home, tmp_path, monkeypatch):
"""``HERMES_KANBAN_DB`` pins the file regardless of board= arg."""
forced = tmp_path / "custom.db"
monkeypatch.setenv("HERMES_KANBAN_DB", str(forced))
assert kb.kanban_db_path() == forced
assert kb.kanban_db_path(board="ignored") == forced
def test_env_var_workspaces_override(self, fresh_home, tmp_path, monkeypatch):
forced = tmp_path / "ws"
monkeypatch.setenv("HERMES_KANBAN_WORKSPACES_ROOT", str(forced))
assert kb.workspaces_root(board="any") == forced
# ---------------------------------------------------------------------------
# Current-board resolution
# ---------------------------------------------------------------------------
class TestCurrentBoard:
def test_default_when_unset(self, fresh_home):
assert kb.get_current_board() == "default"
def test_env_var_takes_precedence(self, fresh_home, monkeypatch):
# Create the board so the env-var value is honoured (get_current_board
# trusts env-var validity, but the resolution chain doesn't require
# the board to exist; we just test that env trumps).
kb.create_board("envboard")
monkeypatch.setenv("HERMES_KANBAN_BOARD", "envboard")
assert kb.get_current_board() == "envboard"
def test_file_pointer_honoured(self, fresh_home):
kb.create_board("filepick")
kb.set_current_board("filepick")
assert kb.get_current_board() == "filepick"
def test_stale_file_pointer_falls_back_to_default(self, fresh_home):
current = fresh_home / "kanban" / "current"
current.parent.mkdir(parents=True, exist_ok=True)
current.write_text("missing-board\n", encoding="utf-8")
assert kb.get_current_board() == "default"
assert not kb.board_exists("missing-board")
assert [b["slug"] for b in kb.list_boards()] == ["default"]
def test_env_beats_file(self, fresh_home, monkeypatch):
kb.create_board("a")
kb.create_board("b")
kb.set_current_board("a")
monkeypatch.setenv("HERMES_KANBAN_BOARD", "b")
assert kb.get_current_board() == "b"
def test_invalid_env_falls_through(self, fresh_home, monkeypatch):
monkeypatch.setenv("HERMES_KANBAN_BOARD", "!!bad!!")
# Should not crash — falls through to default.
assert kb.get_current_board() == "default"
def test_clear_current_board(self, fresh_home):
kb.create_board("x")
kb.set_current_board("x")
kb.clear_current_board()
assert kb.get_current_board() == "default"
def test_kanban_db_path_reads_current(self, fresh_home):
"""kanban_db_path() with no args respects the on-disk pointer."""
kb.create_board("my-proj")
kb.set_current_board("my-proj")
expected = fresh_home / "kanban" / "boards" / "my-proj" / "kanban.db"
assert kb.kanban_db_path() == expected
# ---------------------------------------------------------------------------
# Board CRUD
# ---------------------------------------------------------------------------
class TestBoardCRUD:
def test_create_and_list(self, fresh_home):
assert [b["slug"] for b in kb.list_boards()] == ["default"]
kb.create_board("foo", name="Foo Board", description="test")
slugs = [b["slug"] for b in kb.list_boards()]
assert slugs == ["default", "foo"]
def test_create_is_idempotent(self, fresh_home):
kb.create_board("bar")
kb.create_board("bar") # no error
slugs = [b["slug"] for b in kb.list_boards()]
assert slugs == ["default", "bar"]
def test_create_writes_metadata(self, fresh_home):
meta = kb.create_board(
"baz",
name="Baz",
description="desc",
icon="📦",
color="#abcdef",
)
assert meta["slug"] == "baz"
assert meta["name"] == "Baz"
assert meta["icon"] == "📦"
# Round-trip via read_board_metadata.
again = kb.read_board_metadata("baz")
assert again["name"] == "Baz"
assert again["description"] == "desc"
assert again["icon"] == "📦"
def test_remove_archive(self, fresh_home):
kb.create_board("toremove")
res = kb.remove_board("toremove")
assert res["action"] == "archived"
assert Path(res["new_path"]).exists()
assert "toremove" not in [b["slug"] for b in kb.list_boards()]
def test_remove_hard_delete(self, fresh_home):
kb.create_board("nuke")
d = kb.board_dir("nuke")
assert d.exists()
res = kb.remove_board("nuke", archive=False)
assert res["action"] == "deleted"
assert not d.exists()
def test_remove_default_forbidden(self, fresh_home):
with pytest.raises(ValueError, match="default"):
kb.remove_board("default")
def test_remove_nonexistent_raises(self, fresh_home):
with pytest.raises(ValueError, match="does not exist"):
kb.remove_board("nosuch")
def test_remove_clears_current_pointer(self, fresh_home):
kb.create_board("pinned")
kb.set_current_board("pinned")
kb.remove_board("pinned")
assert kb.get_current_board() == "default"
def test_rename_updates_metadata(self, fresh_home):
kb.create_board("slug-immutable")
kb.write_board_metadata("slug-immutable", name="New Display Name")
assert kb.read_board_metadata("slug-immutable")["name"] == "New Display Name"
# Slug must not change.
assert kb.board_exists("slug-immutable")
# ---------------------------------------------------------------------------
# Connection isolation
# ---------------------------------------------------------------------------
class TestConnectionIsolation:
def test_tasks_do_not_leak_across_boards(self, fresh_home):
kb.create_board("alpha")
kb.create_board("beta")
with kb.connect(board="alpha") as conn:
kb.create_task(conn, title="alpha-task-1", assignee="dev")
kb.create_task(conn, title="alpha-task-2", assignee="dev")
with kb.connect(board="beta") as conn:
kb.create_task(conn, title="beta-only", assignee="dev")
with kb.connect(board="alpha") as conn:
a = kb.list_tasks(conn)
with kb.connect(board="beta") as conn:
b = kb.list_tasks(conn)
with kb.connect(board="default") as conn:
d = kb.list_tasks(conn)
assert {t.title for t in a} == {"alpha-task-1", "alpha-task-2"}
assert {t.title for t in b} == {"beta-only"}
assert d == []
def test_connect_without_args_uses_current(self, fresh_home):
kb.create_board("curr")
kb.set_current_board("curr")
with kb.connect() as conn:
kb.create_task(conn, title="implicit", assignee="x")
with kb.connect(board="curr") as conn:
tasks = kb.list_tasks(conn)
assert [t.title for t in tasks] == ["implicit"]
def test_connect_env_var_overrides_current(self, fresh_home, monkeypatch):
kb.create_board("persist")
kb.create_board("envwin")
kb.set_current_board("persist")
monkeypatch.setenv("HERMES_KANBAN_BOARD", "envwin")
with kb.connect() as conn:
kb.create_task(conn, title="via-env", assignee="x")
with kb.connect(board="envwin") as conn:
assert [t.title for t in kb.list_tasks(conn)] == ["via-env"]
with kb.connect(board="persist") as conn:
assert kb.list_tasks(conn) == []
# ---------------------------------------------------------------------------
# Worker spawn env injection
# ---------------------------------------------------------------------------
class TestWorkerSpawnEnv:
"""Ensure the dispatcher pins ``HERMES_KANBAN_BOARD`` / DB / workspaces on spawn.
We monkey-patch ``subprocess.Popen`` to capture the child env without
actually spawning anything.
"""
def test_default_spawn_sets_env_vars(self, fresh_home, monkeypatch):
captured = {}
class FakeProc:
pid = 12345
def fake_popen(cmd, *args, **kwargs):
captured["cmd"] = cmd
captured["env"] = kwargs.get("env", {})
return FakeProc()
monkeypatch.setattr(subprocess, "Popen", fake_popen)
kb.create_board("spawntest")
task = kb.Task(
id="t_abc",
title="worker test",
body=None,
assignee="teknium",
status="ready",
priority=0,
created_by="user",
created_at=0,
started_at=None,
completed_at=None,
workspace_kind="scratch",
workspace_path=None,
claim_lock=None,
claim_expires=None,
tenant=None,
)
kb._default_spawn(task, str(fresh_home / "ws"), board="spawntest")
env = captured["env"]
assert env["HERMES_KANBAN_BOARD"] == "spawntest"
assert env["HERMES_KANBAN_TASK"] == "t_abc"
# DB path should match the per-board DB, not the legacy default.
expected_db = fresh_home / "kanban" / "boards" / "spawntest" / "kanban.db"
assert env["HERMES_KANBAN_DB"] == str(expected_db)
expected_ws = fresh_home / "kanban" / "boards" / "spawntest" / "workspaces"
assert env["HERMES_KANBAN_WORKSPACES_ROOT"] == str(expected_ws)
def test_default_board_spawn_keeps_legacy_paths(self, fresh_home, monkeypatch):
captured = {}
class FakeProc:
pid = 1
def fake_popen(cmd, *args, **kwargs):
captured["env"] = kwargs.get("env", {})
return FakeProc()
monkeypatch.setattr(subprocess, "Popen", fake_popen)
task = kb.Task(
id="t_def",
title="",
body=None,
assignee="teknium",
status="ready",
priority=0,
created_by=None,
created_at=0,
started_at=None,
completed_at=None,
workspace_kind="scratch",
workspace_path=None,
claim_lock=None,
claim_expires=None,
tenant=None,
)
kb._default_spawn(task, str(fresh_home / "ws"), board=None)
env = captured["env"]
assert env["HERMES_KANBAN_BOARD"] == "default"
assert env["HERMES_KANBAN_DB"] == str(fresh_home / "kanban.db")
# ---------------------------------------------------------------------------
# CLI surface
# ---------------------------------------------------------------------------
def _cli(args: list[str], env_extra: dict | None = None) -> subprocess.CompletedProcess:
"""Run ``hermes kanban …`` with PYTHONPATH pinned to the worktree."""
env = dict(os.environ)
env["PYTHONPATH"] = str(_WORKTREE)
if env_extra:
env.update(env_extra)
return subprocess.run(
[sys.executable, "-m", "hermes_cli.main", "kanban"] + args,
env=env,
capture_output=True,
text=True,
cwd=str(_WORKTREE),
timeout=30,
)
class TestCLI:
def test_boards_list_default_only(self, tmp_path):
env = {"HERMES_HOME": str(tmp_path)}
res = _cli(["boards", "list", "--json"], env_extra=env)
assert res.returncode == 0, res.stderr
data = json.loads(res.stdout)
slugs = [b["slug"] for b in data]
assert slugs == ["default"]
assert data[0]["is_current"] is True
def test_boards_create_and_switch(self, tmp_path):
env = {"HERMES_HOME": str(tmp_path)}
r1 = _cli(
["boards", "create", "myproj", "--name", "My Project", "--switch"],
env_extra=env,
)
assert r1.returncode == 0, r1.stderr
assert "created" in r1.stdout
assert "Switched" in r1.stdout
r2 = _cli(["boards", "list", "--json"], env_extra=env)
data = json.loads(r2.stdout)
cur = [b for b in data if b["is_current"]][0]
assert cur["slug"] == "myproj"
def test_per_board_task_isolation_via_cli(self, tmp_path):
env = {"HERMES_HOME": str(tmp_path)}
assert _cli(["boards", "create", "projA"], env_extra=env).returncode == 0
assert _cli(["boards", "create", "projB"], env_extra=env).returncode == 0
# Create one task on each via --board.
r = _cli(["--board", "projA", "create", "Task A", "--assignee", "dev"], env_extra=env)
assert r.returncode == 0, r.stderr
r = _cli(["--board", "projB", "create", "Task B", "--assignee", "dev"], env_extra=env)
assert r.returncode == 0, r.stderr
# list on each board only shows its own.
listA = _cli(["--board", "projA", "list", "--json"], env_extra=env)
listB = _cli(["--board", "projB", "list", "--json"], env_extra=env)
listD = _cli(["list", "--json"], env_extra=env)
titlesA = [t["title"] for t in json.loads(listA.stdout)]
titlesB = [t["title"] for t in json.loads(listB.stdout)]
titlesD = [t["title"] for t in json.loads(listD.stdout)]
assert titlesA == ["Task A"]
assert titlesB == ["Task B"]
assert titlesD == []
def test_board_flag_rejects_unknown(self, tmp_path):
env = {"HERMES_HOME": str(tmp_path)}
r = _cli(["--board", "ghost", "list"], env_extra=env)
# main.py's dispatcher doesn't propagate return codes today, so we
# assert the user-visible signal: a stderr error message. Whether
# the exit code stays 0 is a separate (pre-existing) issue.
assert "does not exist" in r.stderr
def test_boards_rm_archives(self, tmp_path):
env = {"HERMES_HOME": str(tmp_path)}
_cli(["boards", "create", "rmme"], env_extra=env)
r = _cli(["boards", "rm", "rmme"], env_extra=env)
assert r.returncode == 0, r.stderr
assert "archived" in r.stdout
# Default board list no longer shows it.
res = _cli(["boards", "list", "--json"], env_extra=env)
slugs = [b["slug"] for b in json.loads(res.stdout)]
assert "rmme" not in slugs

View file

@ -0,0 +1,404 @@
"""Tests for the kanban CLI surface (hermes_cli.kanban)."""
from __future__ import annotations
import argparse
import json
import os
from pathlib import Path
import pytest
from hermes_cli import kanban as kc
from hermes_cli import kanban_db as kb
@pytest.fixture
def kanban_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
# ---------------------------------------------------------------------------
# Workspace flag parsing
# ---------------------------------------------------------------------------
@pytest.mark.parametrize(
"value,expected",
[
("scratch", ("scratch", None)),
("worktree", ("worktree", None)),
("dir:/tmp/work", ("dir", "/tmp/work")),
],
)
def test_parse_workspace_flag_valid(value, expected):
assert kc._parse_workspace_flag(value) == expected
def test_parse_workspace_flag_expands_user():
kind, path = kc._parse_workspace_flag("dir:~/vault")
assert kind == "dir"
assert path.endswith("/vault")
assert not path.startswith("~")
@pytest.mark.parametrize("bad", ["cloud", "dir:", "", "worktree:/x"])
def test_parse_workspace_flag_rejects(bad):
if not bad:
# Empty -> defaults; not an error.
assert kc._parse_workspace_flag(bad) == ("scratch", None)
return
with pytest.raises(argparse.ArgumentTypeError):
kc._parse_workspace_flag(bad)
# ---------------------------------------------------------------------------
# run_slash smoke tests (end-to-end via the same entry both CLI and gateway use)
# ---------------------------------------------------------------------------
def test_run_slash_no_args_shows_usage(kanban_home):
out = kc.run_slash("")
assert "kanban" in out.lower()
assert "create" in out.lower() or "subcommand" in out.lower() or "action" in out.lower()
def test_run_slash_create_and_list(kanban_home):
out = kc.run_slash("create 'ship feature' --assignee alice")
assert "Created" in out
out = kc.run_slash("list")
assert "ship feature" in out
assert "alice" in out
def test_run_slash_create_with_parent_and_cascade(kanban_home):
# Parent then child via --parent
out1 = kc.run_slash("create 'parent' --assignee alice")
# Extract the "t_xxxx" id from "Created t_xxxx (ready, ...)"
import re
m = re.search(r"(t_[a-f0-9]+)", out1)
assert m
p = m.group(1)
out2 = kc.run_slash(f"create 'child' --assignee bob --parent {p}")
assert "todo" in out2 # child starts as todo
# Complete parent; list should promote child to ready
kc.run_slash(f"complete {p}")
# Explicit filter: child should now be ready (was todo before complete).
ready_list = kc.run_slash("list --status ready")
assert "child" in ready_list
def test_run_slash_show_includes_comments(kanban_home):
out = kc.run_slash("create 'x'")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
kc.run_slash(f"comment {tid} 'source is paywalled'")
show = kc.run_slash(f"show {tid}")
assert "source is paywalled" in show
def test_run_slash_block_unblock_cycle(kanban_home):
out = kc.run_slash("create 'x' --assignee alice")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
# Claim first so block() finds it running
kc.run_slash(f"claim {tid}")
assert "Blocked" in kc.run_slash(f"block {tid} 'need decision'")
assert "Unblocked" in kc.run_slash(f"unblock {tid}")
def test_run_slash_json_output(kanban_home):
out = kc.run_slash("create 'jsontask' --assignee alice --json")
payload = json.loads(out)
assert payload["title"] == "jsontask"
assert payload["assignee"] == "alice"
assert payload["status"] == "ready"
def test_run_slash_dispatch_dry_run_counts(kanban_home):
kc.run_slash("create 'a' --assignee alice")
kc.run_slash("create 'b' --assignee bob")
out = kc.run_slash("dispatch --dry-run")
assert "Spawned:" in out
def test_run_slash_context_output_format(kanban_home):
out = kc.run_slash("create 'tech spec' --assignee alice --body 'write an RFC'")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
kc.run_slash(f"comment {tid} 'remember to include performance section'")
ctx = kc.run_slash(f"context {tid}")
assert "tech spec" in ctx
assert "write an RFC" in ctx
assert "performance section" in ctx
def test_run_slash_tenant_filter(kanban_home):
kc.run_slash("create 'biz-a task' --tenant biz-a --assignee alice")
kc.run_slash("create 'biz-b task' --tenant biz-b --assignee alice")
a = kc.run_slash("list --tenant biz-a")
b = kc.run_slash("list --tenant biz-b")
assert "biz-a task" in a and "biz-b task" not in a
assert "biz-b task" in b and "biz-a task" not in b
def test_run_slash_usage_error_returns_message(kanban_home):
# Missing required argument for create
out = kc.run_slash("create")
assert "usage" in out.lower() or "error" in out.lower()
def test_run_slash_assign_reassigns(kanban_home):
out = kc.run_slash("create 'x' --assignee alice")
import re
tid = re.search(r"(t_[a-f0-9]+)", out).group(1)
assert "Assigned" in kc.run_slash(f"assign {tid} bob")
show = kc.run_slash(f"show {tid}")
assert "bob" in show
def test_run_slash_link_unlink(kanban_home):
a = kc.run_slash("create 'a'")
b = kc.run_slash("create 'b'")
import re
ta = re.search(r"(t_[a-f0-9]+)", a).group(1)
tb = re.search(r"(t_[a-f0-9]+)", b).group(1)
assert "Linked" in kc.run_slash(f"link {ta} {tb}")
# After link, b is todo
show = kc.run_slash(f"show {tb}")
assert "todo" in show
assert "Unlinked" in kc.run_slash(f"unlink {ta} {tb}")
# ---------------------------------------------------------------------------
# Integration with the COMMAND_REGISTRY
# ---------------------------------------------------------------------------
def test_kanban_is_resolvable():
from hermes_cli.commands import resolve_command
cmd = resolve_command("kanban")
assert cmd is not None
assert cmd.name == "kanban"
def test_kanban_bypasses_active_session_guard():
from hermes_cli.commands import should_bypass_active_session
assert should_bypass_active_session("kanban")
def test_kanban_in_autocomplete_table():
from hermes_cli.commands import COMMANDS, SUBCOMMANDS
assert "/kanban" in COMMANDS
subs = SUBCOMMANDS.get("/kanban") or []
assert "create" in subs
assert "dispatch" in subs
def test_kanban_not_gateway_only():
# kanban is available in BOTH CLI and gateway surfaces.
from hermes_cli.commands import COMMAND_REGISTRY
cmd = next(c for c in COMMAND_REGISTRY if c.name == "kanban")
assert not cmd.cli_only
assert not cmd.gateway_only
# ---------------------------------------------------------------------------
# reclaim + reassign CLI smoke tests
# ---------------------------------------------------------------------------
def test_run_slash_reclaim_running_task(kanban_home):
import re
import time
import secrets
from hermes_cli import kanban_db as kb
out1 = kc.run_slash("create 'stuck worker task' --assignee broken-model")
m = re.search(r"(t_[a-f0-9]+)", out1)
assert m
tid = m.group(1)
# Simulate a running claim outside TTL.
conn = kb.connect()
try:
lock = secrets.token_hex(4)
conn.execute(
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
"worker_pid=? WHERE id=?",
(lock, int(time.time()) + 3600, 4242, tid),
)
conn.execute(
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
)
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
conn.commit()
finally:
conn.close()
out = kc.run_slash(f"reclaim {tid} --reason 'test'")
assert "Reclaimed" in out, out
# Status back to ready.
out2 = kc.run_slash(f"show {tid}")
assert "ready" in out2.lower()
def test_run_slash_reassign_with_reclaim_flag(kanban_home):
import re
import time
import secrets
from hermes_cli import kanban_db as kb
out1 = kc.run_slash("create 'switch model' --assignee orig")
m = re.search(r"(t_[a-f0-9]+)", out1)
tid = m.group(1)
# Simulate a running claim.
conn = kb.connect()
try:
lock = secrets.token_hex(4)
conn.execute(
"UPDATE tasks SET status='running', claim_lock=?, claim_expires=?, "
"worker_pid=? WHERE id=?",
(lock, int(time.time()) + 3600, 4242, tid),
)
conn.execute(
"INSERT INTO task_runs (task_id, status, claim_lock, claim_expires, "
"worker_pid, started_at) VALUES (?, 'running', ?, ?, ?, ?)",
(tid, lock, int(time.time()) + 3600, 4242, int(time.time())),
)
rid = conn.execute("SELECT last_insert_rowid()").fetchone()[0]
conn.execute("UPDATE tasks SET current_run_id=? WHERE id=?", (rid, tid))
conn.commit()
finally:
conn.close()
out = kc.run_slash(f"reassign {tid} newbie --reclaim --reason 'switch'")
assert "Reassigned" in out, out
out2 = kc.run_slash(f"show {tid}")
assert "newbie" in out2
# ---------------------------------------------------------------------------
# /kanban specify — slash surface (same entry point CLI + gateway use)
# ---------------------------------------------------------------------------
def test_run_slash_specify_end_to_end(kanban_home, monkeypatch):
"""The /kanban specify slash command routes through run_slash, which
both the interactive CLI and every gateway platform use. This test
covers both surfaces."""
from unittest.mock import MagicMock
# Create a triage task via the same slash surface.
create_out = kc.run_slash("create 'rough idea' --triage")
import re
m = re.search(r"(t_[a-f0-9]+)", create_out)
assert m, f"no task id in: {create_out!r}"
tid = m.group(1)
# Mock the auxiliary client so we don't hit a real provider.
resp = MagicMock()
resp.choices = [MagicMock()]
resp.choices[0].message.content = (
'{"title": "Spec: rough idea", "body": "**Goal**\\nShip it."}'
)
fake_client = MagicMock()
fake_client.chat.completions.create = MagicMock(return_value=resp)
monkeypatch.setattr(
"agent.auxiliary_client.get_text_auxiliary_client",
lambda *a, **kw: (fake_client, "test-model"),
)
# Specify via slash.
out = kc.run_slash(f"specify {tid}")
assert "Specified" in out
assert tid in out
# Task is promoted and retitled.
with kb.connect() as conn:
task = kb.get_task(conn, tid)
assert task.status in {"todo", "ready"}
assert task.title == "Spec: rough idea"
def test_run_slash_specify_help_is_reachable(kanban_home):
"""`-h`/`--help` on a subcommand returns the actual help text — see
issue #21794. argparse writes help to stdout and exits 0; run_slash
must capture both streams and treat exit 0 as success, not error."""
out = kc.run_slash("specify --help")
assert "specify" in out.lower()
# Help dump should NOT come back wrapped as a usage error.
assert not out.startswith("")
# ---------------------------------------------------------------------------
# /kanban help / no-args / unknown-action UX (issue #21794)
# ---------------------------------------------------------------------------
def test_run_slash_bare_returns_curated_help(kanban_home):
"""Bare `/kanban` returns the curated short-help block — not a 5KB
argparse usage dump."""
out = kc.run_slash("")
assert "/kanban" in out
assert "list" in out
assert "show" in out
# Sanity: should be a chat-friendly size, not the raw usage tree.
assert len(out) < 2000
# Shouldn't surface argparse's usage-error sentinel.
assert "usage error" not in out.lower()
@pytest.mark.parametrize("alias", ["help", "--help", "-h", "?"])
def test_run_slash_help_aliases_match_bare(kanban_home, alias):
"""Every documented help alias produces the same curated output."""
bare = kc.run_slash("")
out = kc.run_slash(alias)
assert out == bare
def test_run_slash_subcommand_help_returns_help_text(kanban_home):
"""`/kanban show -h` returns the actual subcommand help, not a
fake `(usage error: 0)` sentinel."""
out = kc.run_slash("show -h")
assert "task_id" in out
assert "/kanban show" in out
assert not out.startswith("")
def test_run_slash_unknown_action_friendly_error(kanban_home):
"""Unknown subcommand surfaces a single-line usage error prefixed
with our marker no `(usage error: 2)` wrapping, no doubled
`kanban kanban` prog string."""
out = kc.run_slash("frobnicate")
assert "/kanban" in out
assert "frobnicate" in out
assert "/kanban-wrap" not in out
assert "/kanban kanban" not in out
assert "(usage error: " not in out
def test_run_slash_missing_required_arg_friendly_error(kanban_home):
"""Missing positional argument shows the subcommand-scoped usage
line, not the top-level kanban tree."""
out = kc.run_slash("show")
assert "/kanban show" in out
assert "task_id" in out
def test_run_slash_board_override_restores_prior_env(kanban_home, monkeypatch):
kb.create_board("alpha")
kb.create_board("beta")
monkeypatch.setenv("HERMES_KANBAN_BOARD", "beta")
kc.run_slash("--board alpha list")
assert os.environ.get("HERMES_KANBAN_BOARD") == "beta"

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,557 @@
"""Tests for hermes_cli.kanban_diagnostics — rule-engine that produces
structured distress signals (diagnostics) for kanban tasks.
These tests exercise each rule in isolation using minimal in-memory
task/event/run fixtures (no DB) plus a few integration-style cases
that round-trip through the real kanban_db to make sure the rule
engine works on sqlite3.Row objects as well as dataclasses.
"""
from __future__ import annotations
import time
from pathlib import Path
import pytest
from hermes_cli import kanban_db as kb
from hermes_cli import kanban_diagnostics as kd
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def kanban_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
def _task(**overrides):
base = {
"id": "t_demo00",
"title": "demo task",
"assignee": "demo",
"status": "ready",
"consecutive_failures": 0,
"last_failure_error": None,
}
base.update(overrides)
return base
def _event(kind, ts=None, **payload):
return {
"kind": kind,
"created_at": int(ts if ts is not None else time.time()),
"payload": payload or None,
}
def _run(outcome="completed", run_id=1, error=None):
return {
"id": run_id,
"outcome": outcome,
"error": error,
}
# ---------------------------------------------------------------------------
# Each rule — positive + negative + clearing
# ---------------------------------------------------------------------------
def test_hallucinated_cards_fires_on_blocked_event():
task = _task(status="ready")
events = [
_event("created", ts=100),
_event("completion_blocked_hallucination", ts=200,
phantom_cards=["t_bad1", "t_bad2"],
verified_cards=["t_good1"]),
]
# ``now=300`` keeps the synthetic event timestamps in scope without
# tripping the stranded_in_ready rule (events are 100/200 epoch
# which time.time() would treat as ~50yr old).
diags = kd.compute_task_diagnostics(task, events, [], now=300)
halluc = [d for d in diags if d.kind == "hallucinated_cards"]
assert len(halluc) == 1
d = halluc[0]
assert d.severity == "error"
assert d.data["phantom_ids"] == ["t_bad1", "t_bad2"]
# Generic recovery actions always available; comment action too.
kinds = [a.kind for a in d.actions]
assert "comment" in kinds
assert "reassign" in kinds
def test_hallucinated_cards_clears_on_subsequent_completion():
task = _task(status="done")
events = [
_event("completion_blocked_hallucination", ts=100, phantom_cards=["t_x"]),
_event("completed", ts=200, summary="retry worked"),
]
diags = kd.compute_task_diagnostics(task, events, [])
assert diags == []
def test_prose_phantom_refs_fires_after_clean_completion():
# Prose scan emits its event AFTER the completed event in the DB
# path, but a subsequent clean completion clears it. Phantom id
# must be valid hex — the scanner regex is ``t_[a-f0-9]{8,}``.
task = _task(status="done")
events = [
_event("completed", ts=100, summary="referenced t_bad", result_len=0),
_event("suspected_hallucinated_references", ts=101,
phantom_refs=["t_deadbeef99"], source="completion_summary"),
]
diags = kd.compute_task_diagnostics(task, events, [])
assert len(diags) == 1
assert diags[0].kind == "prose_phantom_refs"
assert diags[0].severity == "warning"
assert diags[0].data["phantom_refs"] == ["t_deadbeef99"]
def test_prose_phantom_refs_clears_on_later_clean_edit():
task = _task(status="done")
events = [
_event("completed", ts=100, summary="bad"),
_event("suspected_hallucinated_references", ts=101,
phantom_refs=["t_ffff0000cc"]),
_event("edited", ts=200, fields=["result", "summary"]),
]
diags = kd.compute_task_diagnostics(task, events, [])
assert diags == []
def test_repeated_failures_fires_at_threshold_on_spawn():
"""A task with multiple spawn_failed runs gets a spawn-flavoured
diagnostic (title mentions 'spawn', suggested action is ``doctor``).
"""
task = _task(status="ready", consecutive_failures=3,
last_failure_error="Profile 'debugger' does not exist")
runs = [
_run(outcome="spawn_failed", run_id=1),
_run(outcome="spawn_failed", run_id=2),
_run(outcome="spawn_failed", run_id=3),
]
diags = kd.compute_task_diagnostics(task, [], runs)
assert len(diags) == 1
d = diags[0]
assert d.kind == "repeated_failures"
assert d.severity == "error"
# CLI hints are what operators actually need here.
suggested = [a.label for a in d.actions if a.suggested]
assert any("doctor" in s for s in suggested)
def test_repeated_failures_fires_on_timeout_loop():
"""The rule surfaces for timeout loops too — that's the point of
unifying the counter. Suggested action is 'check logs', not
'fix profile'."""
task = _task(status="ready", consecutive_failures=3,
last_failure_error="elapsed 600s > limit 300s")
runs = [
_run(outcome="timed_out", run_id=1),
_run(outcome="timed_out", run_id=2),
_run(outcome="timed_out", run_id=3),
]
diags = kd.compute_task_diagnostics(task, [], runs)
assert len(diags) == 1
d = diags[0]
assert d.kind == "repeated_failures"
assert d.data["most_recent_outcome"] == "timed_out"
suggested = [a.label for a in d.actions if a.suggested]
assert any("log" in s.lower() for s in suggested)
def test_repeated_failures_escalates_to_critical():
task = _task(consecutive_failures=6, last_failure_error="boom")
diags = kd.compute_task_diagnostics(task, [], [])
assert diags[0].severity == "critical"
def test_repeated_failures_below_threshold_silent():
task = _task(consecutive_failures=2)
assert kd.compute_task_diagnostics(task, [], []) == []
def test_repeated_crashes_counts_trailing_streak_only():
task = _task(status="ready", assignee="crashy")
runs = [
_run(outcome="completed", run_id=1),
_run(outcome="crashed", run_id=2, error="OOM"),
_run(outcome="crashed", run_id=3, error="OOM again"),
]
diags = kd.compute_task_diagnostics(task, [], runs)
assert len(diags) == 1
d = diags[0]
assert d.kind == "repeated_crashes"
# 2 consecutive crashes at the end → default threshold 2 → error severity.
assert d.severity == "error"
assert d.data["consecutive_crashes"] == 2
def test_repeated_crashes_breaks_on_recent_success():
task = _task(status="ready", assignee="fixed")
runs = [
_run(outcome="crashed", run_id=1),
_run(outcome="crashed", run_id=2),
_run(outcome="completed", run_id=3),
]
assert kd.compute_task_diagnostics(task, [], runs) == []
def test_repeated_crashes_escalates_on_many_crashes():
task = _task(status="ready", assignee="x")
runs = [_run(outcome="crashed", run_id=i) for i in range(1, 6)] # 5 in a row
diags = kd.compute_task_diagnostics(task, [], runs)
assert diags[0].severity == "critical"
def test_stuck_in_blocked_fires_past_threshold():
now = int(time.time())
task = _task(status="blocked")
events = [
_event("blocked", ts=now - 3600 * 48, reason="needs approval"),
]
diags = kd.compute_task_diagnostics(
task, events, [], now=now,
)
assert len(diags) == 1
d = diags[0]
assert d.kind == "stuck_in_blocked"
assert d.severity == "warning"
assert d.data["age_hours"] >= 48
def test_stuck_in_blocked_silent_with_recent_comment():
now = int(time.time())
task = _task(status="blocked")
events = [
_event("blocked", ts=now - 3600 * 48),
_event("commented", ts=now - 3600 * 2, author="human"),
]
assert kd.compute_task_diagnostics(task, events, [], now=now) == []
def test_stuck_in_blocked_silent_when_not_blocked():
task = _task(status="ready")
events = [_event("blocked", ts=1000)]
assert kd.compute_task_diagnostics(task, events, [], now=9999999) == []
def test_repeated_crashes_surfaces_actual_error_in_title():
"""The title should lead with the actual error text so operators
see WHAT broke (e.g. rate-limit, auth, OOM) without opening logs.
"""
task = _task(status="ready", assignee="x")
runs = [
_run(outcome="crashed", run_id=1, error="openai: 429 Too Many Requests"),
_run(outcome="crashed", run_id=2, error="openai: 429 Too Many Requests"),
]
diags = kd.compute_task_diagnostics(task, [], runs)
assert len(diags) == 1
d = diags[0]
assert "429" in d.title
assert "Too Many Requests" in d.title
# Full error in detail.
assert "429 Too Many Requests" in d.detail
def test_repeated_crashes_no_error_fallback_title():
task = _task(status="ready", assignee="x")
runs = [
_run(outcome="crashed", run_id=1, error=None),
_run(outcome="crashed", run_id=2, error=None),
]
diags = kd.compute_task_diagnostics(task, [], runs)
assert "no error recorded" in diags[0].title
def test_repeated_failures_surfaces_actual_error_in_title():
task = _task(consecutive_failures=5,
last_failure_error="insufficient_quota: billing limit reached")
diags = kd.compute_task_diagnostics(task, [], [])
assert len(diags) == 1
d = diags[0]
assert "insufficient_quota" in d.title or "billing limit" in d.title
assert "insufficient_quota" in d.detail
def test_repeated_crashes_truncates_huge_tracebacks():
"""Full Python tracebacks can be tens of KB. The title stays one
line (160 chars); the detail caps at 500 chars + ellipsis so the
card doesn't explode visually."""
huge = "Traceback (most recent call last):\n" + (" File\n" * 500)
task = _task(status="ready")
runs = [
_run(outcome="crashed", run_id=1, error=huge),
_run(outcome="crashed", run_id=2, error=huge),
]
diags = kd.compute_task_diagnostics(task, [], runs)
d = diags[0]
# Title only the first line, capped.
assert "\n" not in d.title
assert len(d.title) < 250
# Detail contains the snippet with ellipsis.
assert d.detail.endswith("") or len(d.detail) < 700
# ---------------------------------------------------------------------------
# Severity sorting
# ---------------------------------------------------------------------------
def test_diagnostics_sorted_critical_first():
"""A task with both a critical (many spawn failures) and a warning
(prose phantoms) diagnostic should list the critical one first."""
task = _task(status="done", consecutive_failures=10,
last_failure_error="nope")
events = [
_event("completed", ts=100, summary="referenced t_missing"),
_event("suspected_hallucinated_references", ts=101,
phantom_refs=["t_missing11"]),
]
diags = kd.compute_task_diagnostics(task, events, [])
kinds = [d.kind for d in diags]
assert kinds[0] == "repeated_failures" # critical
assert "prose_phantom_refs" in kinds
# ---------------------------------------------------------------------------
# Integration — runs through real kanban_db so sqlite.Row fields work
# ---------------------------------------------------------------------------
def test_engine_works_on_sqlite_row_objects(kanban_home):
"""Regression: the rule functions must handle sqlite3.Row (which
supports mapping access but not attribute access and isn't a dict)
as well as dataclass Task / plain dict. The API layer passes Row
objects directly.
"""
conn = kb.connect()
try:
parent = kb.create_task(conn, title="p", assignee="w")
real = kb.create_task(conn, title="r", assignee="x", created_by="w")
with pytest.raises(kb.HallucinatedCardsError):
kb.complete_task(
conn, parent,
summary="with phantom", created_cards=[real, "t_deadbeef1"],
)
# Pull Row objects the way the API helper does.
row = conn.execute(
"SELECT * FROM tasks WHERE id = ?", (parent,),
).fetchone()
events = list(conn.execute(
"SELECT * FROM task_events WHERE task_id = ? ORDER BY id",
(parent,),
).fetchall())
runs = list(conn.execute(
"SELECT * FROM task_runs WHERE task_id = ? ORDER BY id",
(parent,),
).fetchall())
diags = kd.compute_task_diagnostics(row, events, runs)
assert len(diags) == 1
assert diags[0].kind == "hallucinated_cards"
assert "t_deadbeef1" in diags[0].data["phantom_ids"]
finally:
conn.close()
# ---------------------------------------------------------------------------
# Error-tolerance: a broken rule shouldn't 500 the whole compute call
# ---------------------------------------------------------------------------
def test_broken_rule_is_isolated(monkeypatch):
def _bad_rule(task, events, runs, now, cfg):
raise RuntimeError("synthetic rule bug")
# Insert a broken rule at the front of the registry; subsequent
# rules should still run and produce their diagnostics.
monkeypatch.setattr(kd, "_RULES", [_bad_rule] + kd._RULES)
task = _task(consecutive_failures=5, last_failure_error="e")
diags = kd.compute_task_diagnostics(task, [], [])
# The broken rule silently drops, the real one still fires.
kinds = [d.kind for d in diags]
assert "repeated_failures" in kinds
# ---------------------------------------------------------------------------
# stranded_in_ready
#
# Surfaces ready tasks that nobody has claimed within the threshold.
# Identity-agnostic by design: catches typo'd assignees, deleted profiles,
# down external worker pools, and misconfigured dispatchers in one rule.
# ---------------------------------------------------------------------------
def test_stranded_in_ready_fires_when_age_exceeds_threshold():
"""Default threshold = 30 min. A ready task promoted 45 min ago
with no claim should fire as a warning."""
now = 100_000
task = _task(status="ready", assignee="demo", claim_lock=None)
# 45 min = 2700s, threshold = 1800s.
events = [_event("created", ts=now - 45 * 60)]
diags = kd.compute_task_diagnostics(task, events, [], now=now)
stranded = [d for d in diags if d.kind == "stranded_in_ready"]
assert len(stranded) == 1
assert stranded[0].severity == "warning"
assert stranded[0].data["age_seconds"] == 45 * 60
assert stranded[0].data["assignee"] == "demo"
def test_stranded_in_ready_silent_below_threshold():
"""A ready task only 10 min old should NOT fire."""
now = 100_000
task = _task(status="ready", assignee="demo", claim_lock=None)
events = [_event("created", ts=now - 10 * 60)]
diags = kd.compute_task_diagnostics(task, events, [], now=now)
assert [d for d in diags if d.kind == "stranded_in_ready"] == []
def test_stranded_in_ready_skips_non_ready_status():
"""Tasks not in ready status are out of scope (running tasks have
their own crash / failure rules)."""
now = 100_000
for status in ("running", "blocked", "done", "todo", "triage"):
task = _task(status=status, assignee="demo")
events = [_event("created", ts=now - 6 * 3600)]
diags = kd.compute_task_diagnostics(task, events, [], now=now)
assert [d for d in diags if d.kind == "stranded_in_ready"] == [], status
def test_stranded_in_ready_skips_unassigned_tasks():
"""Empty assignee = `skipped_unassigned` on the dispatcher already.
Don't double-flag here."""
now = 100_000
task = _task(status="ready", assignee="", claim_lock=None)
events = [_event("created", ts=now - 6 * 3600)]
diags = kd.compute_task_diagnostics(task, events, [], now=now)
assert [d for d in diags if d.kind == "stranded_in_ready"] == []
def test_stranded_in_ready_skips_claimed_tasks():
"""A live claim_lock means a worker is on it — even an old one. Don't
second-guess: the run-level liveness signal owns that decision."""
now = 100_000
task = _task(
status="ready", assignee="demo", claim_lock="run_xyz",
)
events = [_event("created", ts=now - 6 * 3600)]
diags = kd.compute_task_diagnostics(task, events, [], now=now)
assert [d for d in diags if d.kind == "stranded_in_ready"] == []
def test_stranded_in_ready_uses_latest_ready_transition():
"""When multiple ready-transition events exist, the rule should
age-from the most recent a task reclaimed 20 min ago is NOT
stranded for 6h even if it was first created 6h ago."""
now = 100_000
task = _task(status="ready", assignee="demo")
events = [
_event("created", ts=now - 6 * 3600), # 6 h ago
_event("reclaimed", ts=now - 20 * 60), # 20 min ago — wins
]
diags = kd.compute_task_diagnostics(task, events, [], now=now)
assert [d for d in diags if d.kind == "stranded_in_ready"] == []
def test_stranded_in_ready_severity_escalates_with_age():
"""warning → error → critical at 2x and 6x threshold."""
now = 100_000
task = _task(status="ready", assignee="demo")
# Default threshold = 1800s.
cases = [
(45 * 60, "warning"), # 1.5x → warning
(90 * 60, "error"), # 3x → error
(4 * 3600, "critical"), # 8x → critical
]
for age, expected in cases:
events = [_event("created", ts=now - age)]
diags = kd.compute_task_diagnostics(task, events, [], now=now)
stranded = [d for d in diags if d.kind == "stranded_in_ready"]
assert len(stranded) == 1, f"age={age}"
assert stranded[0].severity == expected, (
f"age={age} expected {expected}, got {stranded[0].severity}"
)
def test_stranded_in_ready_respects_config_override():
"""Config override changes the threshold."""
now = 100_000
task = _task(status="ready", assignee="demo")
events = [_event("created", ts=now - 10 * 60)] # 10 min
# Default 30 min — wouldn't fire.
diags = kd.compute_task_diagnostics(task, events, [], now=now)
assert [d for d in diags if d.kind == "stranded_in_ready"] == []
# Lower the threshold to 5 min — now it fires.
diags = kd.compute_task_diagnostics(
task, events, [], now=now,
config={"stranded_threshold_seconds": 5 * 60},
)
stranded = [d for d in diags if d.kind == "stranded_in_ready"]
assert len(stranded) == 1
def test_stranded_in_ready_falls_back_to_created_at():
"""When events have no ready-transition kind, the rule falls back
to the task's ``created_at`` so an ancient stranded task isn't
invisible just because its events got pruned."""
now = 100_000
task = _task(
status="ready", assignee="demo", created_at=now - 4 * 3600,
)
# No qualifying events.
events = [_event("commented", ts=now - 100)]
diags = kd.compute_task_diagnostics(task, events, [], now=now)
stranded = [d for d in diags if d.kind == "stranded_in_ready"]
assert len(stranded) == 1
assert stranded[0].data["age_seconds"] == 4 * 3600
def test_stranded_in_ready_works_on_real_db_row(kanban_home):
"""Round-trip through real kanban_db.connect() — confirms the rule
works on sqlite3.Row objects, not just dicts."""
import time as _t
conn = kb.connect()
try:
# Create a task and force its created_at into the past.
tid = kb.create_task(conn, title="stranded one", assignee="ghost")
old_ts = int(_t.time()) - 90 * 60 # 90 min old
conn.execute(
"UPDATE tasks SET status = 'ready', created_at = ? WHERE id = ?",
(old_ts, tid),
)
conn.commit()
task_row = conn.execute(
"SELECT * FROM tasks WHERE id = ?", (tid,)
).fetchone()
events = list(conn.execute(
"SELECT * FROM task_events WHERE task_id = ? ORDER BY created_at",
(tid,),
).fetchall())
# Override created event timestamps too so age calc lines up.
conn.execute(
"UPDATE task_events SET created_at = ? WHERE task_id = ?",
(old_ts, tid),
)
conn.commit()
events = list(conn.execute(
"SELECT * FROM task_events WHERE task_id = ?", (tid,),
).fetchall())
diags = kd.compute_task_diagnostics(task_row, events, [])
stranded = [d for d in diags if d.kind == "stranded_in_ready"]
assert len(stranded) == 1
assert stranded[0].data["assignee"] == "ghost"
finally:
conn.close()

View file

@ -0,0 +1,481 @@
import asyncio
import pytest
from pathlib import Path
from types import SimpleNamespace
from hermes_cli import kanban_db as kb
from unittest.mock import AsyncMock, MagicMock, patch
# ---------------------------------------------------------------------------
# Fixtures
# ---------------------------------------------------------------------------
@pytest.fixture
def kanban_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
@pytest.mark.asyncio
async def test_notifier_unsubs_after_completed_event(kanban_home):
"""
Subscription should be remove after completed event
"""
import hermes_cli.kanban_db as kb
from gateway.run import GatewayRunner
from gateway.config import Platform
conn = kb.connect()
try:
tid = kb.create_task(conn, title="test task", assignee="worker1")
kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat1")
kb.complete_task(conn, tid, result="completed by agent")
finally:
conn.close()
runner = object.__new__(GatewayRunner)
runner._running = True
runner._kanban_sub_fail_counts = {}
fake_adapter = MagicMock()
async def _send_and_stop(chat_id, msg, metadata=None):
runner._running = False
fake_adapter.send = AsyncMock(side_effect=_send_and_stop)
runner.adapters = {Platform.TELEGRAM: fake_adapter}
_orig_sleep = asyncio.sleep
async def _fast_sleep(_):
await _orig_sleep(0)
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
await asyncio.wait_for(
runner._kanban_notifier_watcher(interval=1),
timeout=10.0,
)
fake_adapter.send.assert_called_once()
call_msg = fake_adapter.send.call_args[0][1]
assert "completed" in call_msg
conn = kb.connect()
try:
subs = kb.list_notify_subs(conn, tid)
finally:
conn.close()
assert subs == [], "Subscription should be unsub after completed event"
@pytest.mark.asyncio
@pytest.mark.parametrize('kind', ["gave_up", "crashed", "timed_out"])
async def test_notifier_unsubs_after_abnormal_events(kind, kanban_home):
"""
Event kinds gave_up / crashed / timed_out send a notification but DO
NOT delete the subscription. The dispatcher may respawn the task and
fire the same event kind again (e.g. a worker that crashes, gets
reclaimed, and crashes a second time); the user must hear about the
second event too. Subscriptions are removed only when the task hits
a truly final status (done / archived) see the comment on
TERMINAL_KINDS in gateway/run.py and PR #21398.
"""
import hermes_cli.kanban_db as kb
from gateway.run import GatewayRunner
from gateway.config import Platform
conn = kb.connect()
try:
tid = kb.create_task(conn, title=f"test {kind} task", assignee="worker1")
kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat1")
kb._append_event(conn, tid, kind=kind)
finally:
conn.close()
runner = object.__new__(GatewayRunner)
runner._running = True
runner._kanban_sub_fail_counts = {}
fake_adapter = MagicMock()
async def _send_and_stop(chat_id, msg, metadata=None):
runner._running = False
fake_adapter.send = AsyncMock(side_effect=_send_and_stop)
runner.adapters = {Platform.TELEGRAM: fake_adapter}
_orig_sleep = asyncio.sleep
async def _fast_sleep(_):
await _orig_sleep(0)
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
await asyncio.wait_for(
runner._kanban_notifier_watcher(interval=1),
timeout=10.0,
)
# The user is notified about the abnormal event...
fake_adapter.send.assert_called_once()
assert kind.replace('_', ' ') in fake_adapter.send.call_args[0][1]
# ...but the subscription survives so a respawn-then-same-event cycle
# reaches the user too. The cursor (last_event_id) advanced inside
# the same write txn as the claim, so the same event won't re-fire.
conn = kb.connect()
try:
subs = kb.list_notify_subs(conn, tid)
finally:
conn.close()
assert len(subs) == 1, (
f"Subscription should survive {kind!r} so the next cycle of the "
f"same event reaches the user; got {subs!r}"
)
assert int(subs[0]["last_event_id"]) >= 1, (
"Cursor should have advanced past the delivered event "
"(claim_unseen_events_for_sub advances atomically inside the "
"same write txn as the read)."
)
@pytest.mark.asyncio
async def test_notifier_second_blocked_delivers(kanban_home):
"""
After the first blocked, should receive second blocked notification.
"""
import hermes_cli.kanban_db as kb
from gateway.run import GatewayRunner
from gateway.config import Platform
runner = object.__new__(GatewayRunner)
runner._running = True
runner._kanban_sub_fail_counts = {}
delivered_msgs: list[str] = []
async def _capture_send(chat_id, msg, metadata=None):
delivered_msgs.append(msg)
fake_adapter = MagicMock()
fake_adapter.send = AsyncMock(side_effect=_capture_send)
runner.adapters = {Platform.TELEGRAM: fake_adapter}
_orig_sleep = asyncio.sleep
tick_count = 0
async def _fast_sleep(_):
nonlocal tick_count
await _orig_sleep(0)
tick_count += 1
if tick_count >= 6:
runner._running = False
conn = kb.connect()
try:
tid = kb.create_task(conn, title="test task", assignee="worker1")
kb.add_notify_sub(conn, task_id=tid, platform="telegram", chat_id="chat1")
# Cycle 1: blocked
kb.block_task(conn, tid, reason="first block")
finally:
conn.close()
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
await asyncio.wait_for(
runner._kanban_notifier_watcher(interval=1),
timeout=10.0,
)
# Cycle 2: unblock → block run again
runner._running = True
tick_count = 0
conn = kb.connect()
try:
kb.unblock_task(conn, tid)
kb.block_task(conn, tid, reason="second block")
finally:
conn.close()
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
await asyncio.wait_for(
runner._kanban_notifier_watcher(interval=1),
timeout=10.0,
)
blocked_deliveries = [m for m in delivered_msgs if "blocked" in m]
assert "second block" not in blocked_deliveries[0]
assert "second block" in blocked_deliveries[1]
assert len(blocked_deliveries) == 2, (
f"Should receive 2 blocked notification, but only get {len(blocked_deliveries)} count\n"
f"Message {delivered_msgs}"
)
# ---------------------------------------------------------------------------
# Regression: gateway watchers must not double-init the kanban DB.
#
# Both the notifier watcher (`_kanban_notifier_watcher`) and the dispatcher
# tick (`_tick_once_for_board`) used to call `_kb.connect(board=slug)`
# immediately followed by `_kb.init_db(board=slug)`. Since `connect()`
# already runs the schema + idempotent migration on first open per process,
# the explicit `init_db()` was redundant — and worse, `init_db()`
# deliberately busts the per-process cache and re-runs the migration on a
# *second* connection, which races the first. On legacy DBs this surfaced
# as `duplicate column name: <col>` (now tolerated by
# `_add_column_if_missing`) and intermittent `database is locked` errors
# (issue #21378).
#
# The fix removes the `init_db()` calls in both watchers; this regression
# test pins that behaviour so we don't reintroduce them.
# ---------------------------------------------------------------------------
@pytest.mark.asyncio
async def test_notifier_does_not_call_init_db(kanban_home):
"""Notifier watcher path must not invoke `_kb.init_db` (issue #21378)."""
import hermes_cli.kanban_db as kb
from gateway.run import GatewayRunner
from gateway.config import Platform
runner = object.__new__(GatewayRunner)
runner._running = True
runner._kanban_sub_fail_counts = {}
fake_adapter = MagicMock()
fake_adapter.send = AsyncMock()
runner.adapters = {Platform.TELEGRAM: fake_adapter}
_orig_sleep = asyncio.sleep
tick_count = 0
async def _fast_sleep(_):
nonlocal tick_count
await _orig_sleep(0)
tick_count += 1
if tick_count >= 3:
runner._running = False
init_db_calls: list[object] = []
real_init_db = kb.init_db
def _spy_init_db(*args, **kwargs):
init_db_calls.append((args, kwargs))
return real_init_db(*args, **kwargs)
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep), \
patch("hermes_cli.kanban_db.init_db", side_effect=_spy_init_db):
await asyncio.wait_for(
runner._kanban_notifier_watcher(interval=1),
timeout=10.0,
)
assert init_db_calls == [], (
"_kanban_notifier_watcher must not call init_db on every tick — "
"connect() handles first-run schema init. "
"Reintroducing init_db revives issue #21378. "
f"Got {len(init_db_calls)} call(s): {init_db_calls}"
)
def test_dispatcher_tick_does_not_call_init_db(kanban_home, monkeypatch):
"""`_tick_once_for_board` must not invoke `_kb.init_db` (issue #21378).
`connect()` already runs the schema + idempotent migration on first open
per process. The explicit `init_db()` call was redundant and triggered a
second migration on a second connection that raced the first.
"""
import hermes_cli.kanban_db as kb
from gateway.run import GatewayRunner
from unittest.mock import patch
runner = object.__new__(GatewayRunner)
init_db_calls: list[object] = []
real_init_db = kb.init_db
def _spy_init_db(*args, **kwargs):
init_db_calls.append((args, kwargs))
return real_init_db(*args, **kwargs)
# The dispatcher watcher's tick lives as a local closure inside
# `_kanban_dispatcher_watcher`. Read the source and assert the
# specific patterns that would reintroduce the bug are absent.
import inspect
src = inspect.getsource(GatewayRunner._kanban_dispatcher_watcher)
assert "_kb.init_db(board=slug)" not in src, (
"_kanban_dispatcher_watcher must not call _kb.init_db(board=slug) — "
"see issue #21378. Use connect() alone; it runs migrations on first "
"open per process."
)
notifier_src = inspect.getsource(GatewayRunner._kanban_notifier_watcher)
assert "_kb.init_db(board=slug)" not in notifier_src, (
"_kanban_notifier_watcher must not call _kb.init_db(board=slug) — "
"see issue #21378."
)
@pytest.mark.asyncio
async def test_notifier_skips_subscription_owned_by_other_profile(kanban_home):
"""Each gateway keeps its watcher on, but only the subscribing profile claims."""
import hermes_cli.kanban_db as kb
from gateway.run import GatewayRunner
from gateway.config import Platform
conn = kb.connect()
try:
tid = kb.create_task(conn, title="owned task", assignee="backend-engineer")
kb.add_notify_sub(
conn,
task_id=tid,
platform="telegram",
chat_id="chat1",
notifier_profile="default",
)
kb.complete_task(conn, tid, result="done")
finally:
conn.close()
runner = object.__new__(GatewayRunner)
runner._running = True
runner._kanban_sub_fail_counts = {}
runner._kanban_notifier_profile = "business-partner"
fake_adapter = MagicMock()
fake_adapter.send = AsyncMock()
runner.adapters = {Platform.TELEGRAM: fake_adapter}
_orig_sleep = asyncio.sleep
tick_count = 0
async def _fast_sleep(_):
nonlocal tick_count
await _orig_sleep(0)
tick_count += 1
if tick_count >= 3:
runner._running = False
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
await asyncio.wait_for(
runner._kanban_notifier_watcher(interval=1),
timeout=10.0,
)
fake_adapter.send.assert_not_called()
conn = kb.connect()
try:
subs = kb.list_notify_subs(conn, tid)
finally:
conn.close()
assert len(subs) == 1
assert int(subs[0]["last_event_id"]) == 0, "wrong profile must not claim the event"
@pytest.mark.asyncio
async def test_notifier_delivers_subscription_owned_by_current_profile(kanban_home):
"""The gateway for the profile that created/subscribed the task reports it."""
import hermes_cli.kanban_db as kb
from gateway.run import GatewayRunner
from gateway.config import Platform
conn = kb.connect()
try:
tid = kb.create_task(conn, title="owned task", assignee="backend-engineer")
kb.add_notify_sub(
conn,
task_id=tid,
platform="telegram",
chat_id="chat1",
notifier_profile="default",
)
kb.complete_task(conn, tid, result="done")
finally:
conn.close()
runner = object.__new__(GatewayRunner)
runner._running = True
runner._kanban_sub_fail_counts = {}
runner._kanban_notifier_profile = "default"
fake_adapter = MagicMock()
async def _send_and_stop(chat_id, msg, metadata=None):
runner._running = False
fake_adapter.send = AsyncMock(side_effect=_send_and_stop)
runner.adapters = {Platform.TELEGRAM: fake_adapter}
_orig_sleep = asyncio.sleep
async def _fast_sleep(_):
await _orig_sleep(0)
with patch("gateway.run.asyncio.sleep", side_effect=_fast_sleep):
await asyncio.wait_for(
runner._kanban_notifier_watcher(interval=1),
timeout=10.0,
)
fake_adapter.send.assert_called_once()
conn = kb.connect()
try:
subs = kb.list_notify_subs(conn, tid)
finally:
conn.close()
assert subs == []
@pytest.mark.asyncio
async def test_gateway_create_autosubscribes_on_explicit_board(kanban_home):
"""`/kanban --board <slug> create ...` must subscribe on that board.
The gateway handler currently auto-subscribes after `/kanban create`,
but the create detection must still work when the shared `--board`
flag appears before the subcommand, and the subscription must land in
that board's DB rather than the ambient/default board.
"""
from gateway.run import GatewayRunner
from gateway.config import Platform
kb.create_board("projx")
runner = object.__new__(GatewayRunner)
source = SimpleNamespace(
platform=Platform.TELEGRAM,
chat_id="chat1",
thread_id="th1",
user_id="u1",
)
event = SimpleNamespace(
text='/kanban --board projx create "hello" --assignee alice',
source=source,
)
out = await GatewayRunner._handle_kanban_command(runner, event)
assert "subscribed" in out.lower()
conn = kb.connect(board="projx")
try:
subs = kb.list_notify_subs(conn)
tasks = kb.list_tasks(conn)
finally:
conn.close()
assert [t.title for t in tasks] == ["hello"]
assert len(subs) == 1
assert subs[0]["chat_id"] == "chat1"
assert subs[0]["thread_id"] == "th1"
conn = kb.connect(board="default")
try:
assert kb.list_notify_subs(conn) == []
finally:
conn.close()

View file

@ -0,0 +1,337 @@
"""Tests for the specifier module + `hermes kanban specify` CLI surface.
The auxiliary LLM client is mocked these tests don't hit any network or
real provider. They exercise the prompt plumbing, response parsing, DB
writes, and CLI flag surface.
"""
from __future__ import annotations
import argparse
import json as jsonlib
from pathlib import Path
from unittest.mock import MagicMock, patch
import pytest
from hermes_cli import kanban as kanban_cli
from hermes_cli import kanban_db as kb
from hermes_cli import kanban_specify as spec
@pytest.fixture
def kanban_home(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
def _fake_aux_response(content: str):
"""Build a minimal object shaped like an OpenAI chat.completions result.
The specifier only reads ``resp.choices[0].message.content``, so we
avoid importing the openai SDK and build the tree with MagicMock.
"""
resp = MagicMock()
resp.choices = [MagicMock()]
resp.choices[0].message.content = content
return resp
def _mock_client_returning(content: str):
client = MagicMock()
client.chat.completions.create = MagicMock(return_value=_fake_aux_response(content))
return client
def _patch_aux_client(content: str, *, model: str = "test-model"):
"""Patch get_text_auxiliary_client at its source + at the module that
imported it lazily inside specify_task. Both patches are needed
because kanban_specify imports the function inside the function body.
"""
client = _mock_client_returning(content)
return patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(client, model),
), client
# ---------------------------------------------------------------------------
# JSON extraction helpers
# ---------------------------------------------------------------------------
def test_extract_json_blob_handles_plain_json():
raw = '{"title": "T", "body": "B"}'
assert spec._extract_json_blob(raw) == {"title": "T", "body": "B"}
def test_extract_json_blob_handles_fenced_json():
raw = '```json\n{"title": "T", "body": "B"}\n```'
assert spec._extract_json_blob(raw) == {"title": "T", "body": "B"}
def test_extract_json_blob_handles_prose_preamble():
raw = 'Sure! Here you go:\n{"title": "T", "body": "B"}\nThanks.'
assert spec._extract_json_blob(raw) == {"title": "T", "body": "B"}
def test_extract_json_blob_returns_none_for_unparseable():
assert spec._extract_json_blob("no json here") is None
assert spec._extract_json_blob("") is None
assert spec._extract_json_blob("{not: valid}") is None
# ---------------------------------------------------------------------------
# specify_task (module-level entry point)
# ---------------------------------------------------------------------------
def test_specify_task_happy_path(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(conn, title="rough", triage=True)
content = jsonlib.dumps({
"title": "Refined rough",
"body": "**Goal**\nA concrete goal.",
})
p, _ = _patch_aux_client(content)
with p:
outcome = spec.specify_task(tid, author="ace")
assert outcome.ok is True
assert outcome.task_id == tid
assert outcome.new_title == "Refined rough"
with kb.connect() as conn:
task = kb.get_task(conn, tid)
# Parent-free → recompute_ready promotes to ready.
assert task.status == "ready"
assert task.title == "Refined rough"
assert "**Goal**" in (task.body or "")
def test_specify_task_falls_back_to_body_only_on_bad_json(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(conn, title="keep title", triage=True)
# Model returned plain markdown, no JSON object.
content = "Goal: Do a thing.\nApproach: Steps here."
p, _ = _patch_aux_client(content)
with p:
outcome = spec.specify_task(tid)
assert outcome.ok is True
with kb.connect() as conn:
t = kb.get_task(conn, tid)
# Title preserved (no JSON with a title key).
assert t.title == "keep title"
# Body replaced with the raw response.
assert "Goal:" in (t.body or "")
def test_specify_task_rejects_non_triage_task(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(conn, title="ready task")
p, client = _patch_aux_client("unused")
with p:
outcome = spec.specify_task(tid)
assert outcome.ok is False
assert "not in triage" in outcome.reason
# LLM must not be invoked for a non-triage task — fail cheap.
assert client.chat.completions.create.call_count == 0
def test_specify_task_unknown_id(kanban_home):
p, client = _patch_aux_client("unused")
with p:
outcome = spec.specify_task("t_nope")
assert outcome.ok is False
assert "unknown task" in outcome.reason
assert client.chat.completions.create.call_count == 0
def test_specify_task_no_aux_client_configured(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(conn, title="rough", triage=True)
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(None, ""),
):
outcome = spec.specify_task(tid)
assert outcome.ok is False
assert "auxiliary client" in outcome.reason
# Task must stay in triage — we never touched it.
with kb.connect() as conn:
assert kb.get_task(conn, tid).status == "triage"
def test_specify_task_llm_api_error_keeps_task_in_triage(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(conn, title="rough", triage=True)
client = MagicMock()
client.chat.completions.create = MagicMock(side_effect=RuntimeError("429 rate limited"))
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(client, "test-model"),
):
outcome = spec.specify_task(tid)
assert outcome.ok is False
assert "LLM error" in outcome.reason
with kb.connect() as conn:
assert kb.get_task(conn, tid).status == "triage"
def test_specify_task_empty_llm_response(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(conn, title="rough", triage=True)
p, _ = _patch_aux_client("")
with p:
outcome = spec.specify_task(tid)
assert outcome.ok is False
with kb.connect() as conn:
assert kb.get_task(conn, tid).status == "triage"
def test_list_triage_ids(kanban_home):
with kb.connect() as conn:
a = kb.create_task(conn, title="a", triage=True)
b = kb.create_task(conn, title="b", triage=True, tenant="proj-1")
kb.create_task(conn, title="c") # not triage — excluded
ids_all = spec.list_triage_ids()
assert set(ids_all) == {a, b}
ids_tenant = spec.list_triage_ids(tenant="proj-1")
assert ids_tenant == [b]
# ---------------------------------------------------------------------------
# CLI wiring — argparse + _cmd_specify
# ---------------------------------------------------------------------------
def _run_cli(*argv: str) -> int:
"""Invoke the `hermes kanban …` argparse surface directly."""
root = argparse.ArgumentParser()
subp = root.add_subparsers(dest="cmd")
kanban_cli.build_parser(subp)
ns = root.parse_args(["kanban", *argv])
return kanban_cli.kanban_command(ns)
def test_cli_specify_requires_id_or_all(kanban_home, capsys):
rc = _run_cli("specify")
assert rc == 2
err = capsys.readouterr().err
assert "requires a task id or --all" in err
def test_cli_specify_rejects_both_id_and_all(kanban_home, capsys):
with kb.connect() as conn:
tid = kb.create_task(conn, title="rough", triage=True)
rc = _run_cli("specify", tid, "--all")
assert rc == 2
err = capsys.readouterr().err
assert "either a task id OR --all" in err
def test_cli_specify_single_id_success(kanban_home, capsys):
with kb.connect() as conn:
tid = kb.create_task(conn, title="rough", triage=True)
content = jsonlib.dumps({"title": "clean", "body": "body"})
p, _ = _patch_aux_client(content)
with p:
rc = _run_cli("specify", tid)
assert rc == 0
out = capsys.readouterr().out
assert tid in out
assert "→ todo" in out or "-> todo" in out or "" in out
def test_cli_specify_all_success_and_json(kanban_home, capsys):
with kb.connect() as conn:
a = kb.create_task(conn, title="a", triage=True)
b = kb.create_task(conn, title="b", triage=True)
content = jsonlib.dumps({"title": "spec", "body": "body"})
p, _ = _patch_aux_client(content)
with p:
rc = _run_cli("specify", "--all", "--json")
assert rc == 0
lines = [l for l in capsys.readouterr().out.strip().splitlines() if l]
# One JSON object per task + nothing else.
assert len(lines) == 2
parsed = [jsonlib.loads(l) for l in lines]
ids = {row["task_id"] for row in parsed}
assert ids == {a, b}
assert all(row["ok"] for row in parsed)
def test_cli_specify_all_empty_triage_column(kanban_home, capsys):
rc = _run_cli("specify", "--all")
assert rc == 0
assert "No triage tasks" in capsys.readouterr().out
def test_cli_specify_all_returns_1_when_every_task_fails(kanban_home, capsys):
with kb.connect() as conn:
kb.create_task(conn, title="a", triage=True)
kb.create_task(conn, title="b", triage=True)
with patch(
"agent.auxiliary_client.get_text_auxiliary_client",
return_value=(None, ""), # no aux client → every task fails
):
rc = _run_cli("specify", "--all")
assert rc == 1
def test_cli_specify_tenant_filter(kanban_home, capsys):
with kb.connect() as conn:
outside = kb.create_task(conn, title="outside", triage=True)
inside = kb.create_task(
conn, title="inside", triage=True, tenant="proj-a",
)
content = jsonlib.dumps({"title": "spec", "body": "body"})
p, _ = _patch_aux_client(content)
with p:
rc = _run_cli("specify", "--all", "--tenant", "proj-a", "--json")
assert rc == 0
lines = [
jsonlib.loads(l)
for l in capsys.readouterr().out.strip().splitlines()
if l
]
ids = {row["task_id"] for row in lines}
assert ids == {inside}
# The outside task stays in triage.
with kb.connect() as conn:
assert kb.get_task(conn, outside).status == "triage"
# The inside task was promoted.
assert kb.get_task(conn, inside).status in {"todo", "ready"}
def test_cli_specify_author_passed_through(kanban_home, capsys):
with kb.connect() as conn:
tid = kb.create_task(conn, title="rough", triage=True)
content = jsonlib.dumps({"title": "fresh title", "body": "fresh body"})
p, _ = _patch_aux_client(content)
with p:
rc = _run_cli("specify", tid, "--author", "custom-agent")
assert rc == 0
with kb.connect() as conn:
comments = kb.list_comments(conn, tid)
assert comments and comments[0].author == "custom-agent"

View file

@ -0,0 +1,184 @@
"""Tests for kb.specify_triage_task — the DB-layer atomic promotion
from the triage column to todo. LLM-free by design."""
from __future__ import annotations
from pathlib import Path
import pytest
from hermes_cli import kanban_db as kb
@pytest.fixture
def kanban_home(tmp_path, monkeypatch):
"""Isolated HERMES_HOME with an empty kanban DB."""
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
monkeypatch.setattr(Path, "home", lambda: tmp_path)
kb.init_db()
return home
def _create_triage(conn, title="rough idea", body=None, assignee=None):
return kb.create_task(
conn,
title=title,
body=body,
assignee=assignee,
triage=True,
)
def test_specify_promotes_triage_to_todo(kanban_home):
with kb.connect() as conn:
tid = _create_triage(conn, title="rough idea")
assert kb.get_task(conn, tid).status == "triage"
with kb.connect() as conn:
ok = kb.specify_triage_task(
conn,
tid,
title="Refined: rough idea",
body="**Goal**\nDo the thing.",
author="specifier-bot",
)
assert ok is True
with kb.connect() as conn:
task = kb.get_task(conn, tid)
# No parents → recompute_ready should have flipped it past todo to ready.
assert task.status == "ready"
assert task.title == "Refined: rough idea"
assert "**Goal**" in (task.body or "")
def test_specify_with_open_parent_lands_in_todo_not_ready(kanban_home):
# Parent-gated specified tasks must not jump the dispatcher — they go
# to todo and wait for parent completion like any other gated task.
with kb.connect() as conn:
parent = kb.create_task(conn, title="parent work")
child = _create_triage(conn, title="child idea")
kb.link_tasks(conn, parent, child)
# After linking with an open parent, triage status should still be
# 'triage' (linking doesn't touch triage tasks).
assert kb.get_task(conn, child).status == "triage"
with kb.connect() as conn:
ok = kb.specify_triage_task(
conn,
child,
body="full spec",
author="specifier",
)
assert ok is True
with kb.connect() as conn:
t = kb.get_task(conn, child)
# Parent still open → specified child sits in 'todo', not 'ready'.
assert t.status == "todo"
def test_specify_refuses_non_triage_task(kanban_home):
with kb.connect() as conn:
tid = kb.create_task(conn, title="normal task")
assert kb.get_task(conn, tid).status == "ready"
with kb.connect() as conn:
ok = kb.specify_triage_task(conn, tid, body="won't apply")
assert ok is False
with kb.connect() as conn:
# Status unchanged.
assert kb.get_task(conn, tid).status == "ready"
def test_specify_returns_false_for_unknown_id(kanban_home):
with kb.connect() as conn:
ok = kb.specify_triage_task(conn, "t_does_not_exist", body="x")
assert ok is False
def test_specify_rejects_blank_title(kanban_home):
with kb.connect() as conn:
tid = _create_triage(conn, title="rough")
with kb.connect() as conn, pytest.raises(ValueError):
kb.specify_triage_task(conn, tid, title=" ", body="ok")
def test_specify_emits_event(kanban_home):
with kb.connect() as conn:
tid = _create_triage(conn, title="rough")
with kb.connect() as conn:
kb.specify_triage_task(
conn, tid, title="new", body="b", author="ace"
)
with kb.connect() as conn:
events = kb.list_events(conn, tid)
kinds = [e.kind for e in events]
assert "specified" in kinds
# The specified event records which fields actually changed as a
# JSON payload under task_events.payload.
spec_ev = next(e for e in events if e.kind == "specified")
assert spec_ev.payload is not None
fields = spec_ev.payload.get("changed_fields") or []
assert "title" in fields
assert "body" in fields
def test_specify_records_audit_comment_only_when_author_given(kanban_home):
# With author → comment added.
with kb.connect() as conn:
tid1 = _create_triage(conn, title="a")
kb.specify_triage_task(
conn, tid1, title="A-spec", body="b", author="ace"
)
comments1 = kb.list_comments(conn, tid1)
assert len(comments1) == 1
assert "Specified" in comments1[0].body
assert comments1[0].author == "ace"
# Without author → no comment (silent).
with kb.connect() as conn:
tid2 = _create_triage(conn, title="b")
kb.specify_triage_task(conn, tid2, title="B-spec", body="b")
comments2 = kb.list_comments(conn, tid2)
assert comments2 == []
def test_specify_skips_comment_when_nothing_changed(kanban_home):
# Create triage task with title and body already set; pass identical
# values to specify. Should promote to todo but skip audit comment.
with kb.connect() as conn:
tid = _create_triage(conn, title="same", body="same body")
with kb.connect() as conn:
ok = kb.specify_triage_task(
conn,
tid,
title="same",
body="same body",
author="ace",
)
assert ok is True
with kb.connect() as conn:
# Promoted.
assert kb.get_task(conn, tid).status in {"todo", "ready"}
# No audit comment because neither field changed.
assert kb.list_comments(conn, tid) == []
def test_specify_with_only_body_preserves_title(kanban_home):
with kb.connect() as conn:
tid = _create_triage(conn, title="keep this title")
with kb.connect() as conn:
kb.specify_triage_task(conn, tid, body="new body only")
with kb.connect() as conn:
t = kb.get_task(conn, tid)
assert t.title == "keep this title"
assert t.body == "new body only"
def test_specify_second_call_noop_false(kanban_home):
# Promoting twice must not crash and the second call returns False
# because the task is no longer in triage.
with kb.connect() as conn:
tid = _create_triage(conn, title="once")
with kb.connect() as conn:
assert kb.specify_triage_task(conn, tid, body="spec") is True
with kb.connect() as conn:
assert kb.specify_triage_task(conn, tid, body="spec again") is False

View file

@ -0,0 +1,261 @@
"""Tests for ``list_picker_providers`` — the /model picker filter.
``list_picker_providers`` wraps ``list_authenticated_providers`` and
post-processes the result for interactive pickers (Telegram, Discord):
- OpenRouter's ``models`` are replaced with the live-filtered output of
``fetch_openrouter_models``, so IDs the live catalog no longer carries
drop out.
- Provider rows with an empty ``models`` list are dropped, except custom
endpoints (``is_user_defined=True`` with an ``api_url``) where the user
may supply their own model set through config.
These tests exercise the filter in isolation by mocking
``list_authenticated_providers`` and ``fetch_openrouter_models`` so no
network or auth state is required.
"""
import pytest
from hermes_cli import model_switch
def _make_provider(slug, name=None, models=None, *, is_current=False,
is_user_defined=False, source="built-in", api_url=None):
"""Build a dict shaped like ``list_authenticated_providers`` output."""
entry = {
"slug": slug,
"name": name or slug.title(),
"is_current": is_current,
"is_user_defined": is_user_defined,
"models": list(models or []),
"total_models": len(models or []),
"source": source,
}
if api_url is not None:
entry["api_url"] = api_url
return entry
def test_openrouter_models_replaced_with_live_catalog(monkeypatch):
"""OpenRouter row's ``models`` should come from fetch_openrouter_models."""
base = [
_make_provider("openrouter", models=["openai/gpt-stale", "old/model"]),
]
live = [("openai/gpt-5.4", "recommended"), ("moonshotai/kimi-k2.6", "")]
monkeypatch.setattr(model_switch, "list_authenticated_providers",
lambda **kw: list(base))
monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
lambda *a, **kw: list(live))
result = model_switch.list_picker_providers(max_models=50)
assert len(result) == 1
openrouter = result[0]
assert openrouter["slug"] == "openrouter"
assert openrouter["models"] == ["openai/gpt-5.4", "moonshotai/kimi-k2.6"]
assert openrouter["total_models"] == 2
def test_openrouter_falls_back_to_base_models_on_fetch_failure(monkeypatch):
"""If the live catalog fetch raises, keep whatever base provided."""
fallback_models = ["openai/gpt-5.4", "moonshotai/kimi-k2.6"]
base = [_make_provider("openrouter", models=fallback_models)]
def _raise(*_a, **_kw):
raise RuntimeError("network down")
monkeypatch.setattr(model_switch, "list_authenticated_providers",
lambda **kw: list(base))
monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models", _raise)
result = model_switch.list_picker_providers(max_models=50)
assert len(result) == 1
assert result[0]["models"] == fallback_models
def test_openrouter_empty_live_catalog_drops_row(monkeypatch):
"""If the live catalog returns nothing for OpenRouter, drop the row."""
base = [_make_provider("openrouter", models=["something/stale"])]
monkeypatch.setattr(model_switch, "list_authenticated_providers",
lambda **kw: list(base))
monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
lambda *a, **kw: [])
result = model_switch.list_picker_providers(max_models=50)
assert result == []
def test_non_openrouter_rows_passed_through_unchanged(monkeypatch):
"""Non-OpenRouter providers keep their curated ``models`` as-is."""
base = [
_make_provider("anthropic", models=["claude-sonnet-4-6", "claude-opus-4-7"]),
_make_provider("gemini", models=["gemini-3-flash-preview"]),
]
monkeypatch.setattr(model_switch, "list_authenticated_providers",
lambda **kw: list(base))
# fetch_openrouter_models must not be consulted when there's no openrouter row
monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
lambda *a, **kw: pytest.fail("should not be called"))
result = model_switch.list_picker_providers(max_models=50)
assert [p["slug"] for p in result] == ["anthropic", "gemini"]
assert result[0]["models"] == ["claude-sonnet-4-6", "claude-opus-4-7"]
assert result[1]["models"] == ["gemini-3-flash-preview"]
def test_empty_models_row_dropped(monkeypatch):
"""Built-in provider with an empty ``models`` list is dropped."""
base = [
_make_provider("anthropic", models=[]), # drop
_make_provider("openrouter", models=["anything"]), # replaced by live
]
monkeypatch.setattr(model_switch, "list_authenticated_providers",
lambda **kw: list(base))
monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
lambda *a, **kw: [("openai/gpt-5.4", "recommended")])
result = model_switch.list_picker_providers(max_models=50)
assert [p["slug"] for p in result] == ["openrouter"]
def test_custom_endpoint_with_api_url_kept_when_models_empty(monkeypatch):
"""User-defined endpoints with an ``api_url`` survive even if models empty.
Rationale: custom endpoints may accept any model id the user types --
the picker still shows the row so the user can enter one manually.
"""
base = [
_make_provider("local-ollama", is_user_defined=True,
api_url="http://localhost:11434/v1", models=[],
source="user-config"),
]
monkeypatch.setattr(model_switch, "list_authenticated_providers",
lambda **kw: list(base))
monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
lambda *a, **kw: [])
result = model_switch.list_picker_providers(max_models=50)
assert len(result) == 1
assert result[0]["slug"] == "local-ollama"
assert result[0]["models"] == []
def test_user_defined_without_api_url_and_empty_models_dropped(monkeypatch):
"""An is_user_defined row WITHOUT api_url and no models is still dropped.
The exemption is specifically for custom endpoints that can accept
arbitrary model ids; without an api_url there's nothing to point at.
"""
base = [
_make_provider("orphan", is_user_defined=True, api_url=None, models=[]),
]
monkeypatch.setattr(model_switch, "list_authenticated_providers",
lambda **kw: list(base))
monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
lambda *a, **kw: [])
result = model_switch.list_picker_providers(max_models=50)
assert result == []
def test_max_models_caps_openrouter_live_output(monkeypatch):
"""``max_models`` caps how many OpenRouter IDs land in the row."""
live = [(f"vendor/model-{i}", "") for i in range(20)]
base = [_make_provider("openrouter", models=["placeholder"])]
monkeypatch.setattr(model_switch, "list_authenticated_providers",
lambda **kw: list(base))
monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
lambda *a, **kw: list(live))
result = model_switch.list_picker_providers(max_models=5)
assert len(result) == 1
assert len(result[0]["models"]) == 5
assert result[0]["models"] == [mid for mid, _ in live[:5]]
# total_models reflects the full live catalog, not the capped slice.
assert result[0]["total_models"] == 20
def test_passthrough_kwargs_to_base(monkeypatch):
"""All kwargs must be forwarded to ``list_authenticated_providers`` unchanged.
The gateway /model picker passes ``current_base_url`` and ``current_model``
so custom endpoint grouping can mark the current row. Dropping those kwargs
regressed Telegram/Discord into the text-list fallback.
"""
captured = {}
def _capture(**kwargs):
captured.update(kwargs)
return []
monkeypatch.setattr(model_switch, "list_authenticated_providers", _capture)
monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
lambda *a, **kw: [])
model_switch.list_picker_providers(
current_provider="openrouter",
current_base_url="http://x",
current_model="openai/gpt-5.4",
user_providers={"foo": {"api": "http://x"}},
custom_providers=[{"name": "bar", "base_url": "http://y"}],
max_models=12,
)
assert captured["current_provider"] == "openrouter"
assert captured["current_base_url"] == "http://x"
assert captured["current_model"] == "openai/gpt-5.4"
assert captured["user_providers"] == {"foo": {"api": "http://x"}}
assert captured["custom_providers"] == [{"name": "bar", "base_url": "http://y"}]
assert captured["max_models"] == 12
def test_current_custom_endpoint_passthrough_marks_current_row(monkeypatch):
"""Interactive picker should preserve current custom endpoint semantics."""
monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
monkeypatch.setattr("agent.models_dev.PROVIDER_TO_MODELS_DEV", {})
monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
monkeypatch.setattr("hermes_cli.models.fetch_openrouter_models",
lambda *a, **kw: [])
result = model_switch.list_picker_providers(
current_provider="custom:ollama",
current_base_url="http://localhost:11434/v1",
current_model="glm-5.1",
user_providers={},
custom_providers=[
{
"name": "Ollama — GLM 5.1",
"base_url": "http://localhost:11434/v1",
"api_key": "ollama",
"model": "glm-5.1",
},
{
"name": "Ollama — Qwen3",
"base_url": "http://localhost:11434/v1",
"api_key": "ollama",
"model": "qwen3",
},
],
max_models=50,
)
custom_rows = [p for p in result if p.get("is_user_defined")]
assert len(custom_rows) == 1
row = custom_rows[0]
assert row["slug"] == "custom:ollama"
assert row["is_current"] is True
assert row["models"] == ["glm-5.1", "qwen3"]

View file

@ -0,0 +1,87 @@
"""Regression test: ``hermes mcp add --command`` must not clobber the
top-level ``args.command`` subparser dest.
The top-level argparse parser uses ``dest="command"`` for its subparsers
(``hermes_cli/_parser.py``). The dispatcher in ``hermes_cli/main.py``
reads ``args.command`` to decide which command to run; if it is ``None``
it falls through to interactive chat.
The ``mcp add`` subparser exposes a ``--command`` flag (the stdio command
for an MCP server, e.g. ``npx``). Without an explicit ``dest=``, argparse
derives the dest from the flag name and writes ``args.command = None``
when the flag is omitted, overwriting the top-level ``"mcp"`` value. As a
result, ``hermes mcp add foo --url ...`` silently launches chat instead
of registering an MCP server.
The fix: declare the flag with ``dest="mcp_command"``. The CLI flag name
is unchanged; only the in-memory attribute moves.
We replicate the relevant parser shape here rather than importing the
real builder, mirroring ``test_argparse_flag_propagation.py`` and
``test_subparser_routing_fallback.py``.
"""
import argparse
def _build_parser():
"""Minimal replica of the slice of the hermes parser that exhibits
the bug: top-level subparsers (dest="command") and ``mcp add`` with
its ``--command`` flag.
"""
parser = argparse.ArgumentParser(prog="hermes")
subparsers = parser.add_subparsers(dest="command")
subparsers.add_parser("chat")
mcp_p = subparsers.add_parser("mcp")
mcp_sub = mcp_p.add_subparsers(dest="mcp_action")
mcp_add = mcp_sub.add_parser("add")
mcp_add.add_argument("name")
mcp_add.add_argument("--url")
mcp_add.add_argument("--command", dest="mcp_command")
return parser
class TestMcpAddCommandDest:
def test_url_invocation_preserves_top_level_command(self):
"""`hermes mcp add foo --url ...` must keep args.command == "mcp".
Before the dest fix this was clobbered to None, sending the
dispatcher into the chat fallback.
"""
parser = _build_parser()
args = parser.parse_args(
["mcp", "add", "foo", "--url", "https://example.com/mcp"]
)
assert args.command == "mcp"
assert args.mcp_action == "add"
assert args.name == "foo"
assert args.url == "https://example.com/mcp"
assert args.mcp_command is None
def test_command_flag_writes_to_mcp_command_dest(self):
"""`--command npx` must populate args.mcp_command, not args.command."""
parser = _build_parser()
args = parser.parse_args(
["mcp", "add", "github", "--command", "npx"]
)
assert args.command == "mcp"
assert args.mcp_command == "npx"
def test_bare_mcp_add_does_not_clobber_command(self):
"""Even without --url or --command, args.command stays "mcp".
Catches the regression at the parser layer regardless of which
transport flag the user passes.
"""
parser = _build_parser()
args = parser.parse_args(["mcp", "add", "foo"])
assert args.command == "mcp"
assert args.mcp_command is None
assert args.url is None

View file

@ -43,7 +43,7 @@ def _make_args(**kwargs):
defaults = {
"name": "test-server",
"url": None,
"command": None,
"mcp_command": None,
"args": None,
"auth": None,
"preset": None,
@ -233,7 +233,7 @@ class TestMcpAdd:
cmd_mcp_add(_make_args(
name="github",
command="npx",
mcp_command="npx",
args=["@mcp/github"],
))
out = capsys.readouterr().out
@ -291,7 +291,7 @@ class TestMcpAdd:
cmd_mcp_add(_make_args(
name="github",
command="npx",
mcp_command="npx",
args=["@mcp/github"],
env=["MY_API_KEY=secret123", "DEBUG=true"],
))
@ -313,7 +313,7 @@ class TestMcpAdd:
cmd_mcp_add(_make_args(
name="github",
command="npx",
mcp_command="npx",
args=["@mcp/github"],
env=["BAD-NAME=value"],
))
@ -390,7 +390,7 @@ class TestMcpAdd:
cmd_mcp_add(_make_args(
name="custom",
preset="testmcp",
command="uvx",
mcp_command="uvx",
args=["custom-server"],
))
out = capsys.readouterr().out

View file

@ -3,6 +3,7 @@
from __future__ import annotations
import json
import os
import time
from pathlib import Path
from unittest.mock import patch
@ -282,3 +283,48 @@ class TestIntegrationWithModelsModule:
result = get_curated_nous_model_ids()
assert result == ["anthropic/claude-opus-4.7", "moonshotai/kimi-k2.6"]
def test_picker_nous_row_uses_manifest(self, tmp_path, monkeypatch):
"""The /model picker must surface the manifest's nous list, not the
in-repo _PROVIDER_MODELS["nous"] snapshot. Regression: before this
fix, list_authenticated_providers() built the curated dict from
_PROVIDER_MODELS only so newly-added Portal models never reached
the slash-command picker until the next Hermes release.
"""
# We deliberately do NOT use the ``isolated_home`` fixture here:
# that fixture monkeypatches ``Path.home`` to ``tmp_path``, which
# trips the auth-store seat-belt in ``_auth_file_path()`` because
# ``HERMES_HOME / auth.json`` then resolves to the same path the
# seat-belt thinks is the "real" user store. Use the autouse
# ``_hermetic_environment`` HERMES_HOME directly instead.
import importlib
from hermes_cli import model_catalog
importlib.reload(model_catalog)
try:
from hermes_cli.model_switch import list_picker_providers
active_home = Path(os.environ["HERMES_HOME"])
(active_home / "auth.json").write_text(
json.dumps(
{
"providers": {"nous": {"access_token": "fake"}},
"credential_pool": {},
}
)
)
with patch.object(
model_catalog, "_fetch_manifest", return_value=_valid_manifest()
):
picker = list_picker_providers(
current_provider="nous", max_models=99
)
finally:
model_catalog.reset_cache()
nous_row = next((r for r in picker if r["slug"] == "nous"), None)
assert nous_row is not None, "nous row must appear when authed"
assert nous_row["models"] == [
"anthropic/claude-opus-4.7",
"moonshotai/kimi-k2.6",
]

View file

@ -71,6 +71,32 @@ class TestSaveModelChoiceAlwaysDict:
class TestProviderPersistsAfterModelSave:
def test_update_config_for_provider_uses_atomic_yaml_write(self, config_home):
"""Provider switches should delegate config writes to atomic_yaml_write."""
from hermes_cli.auth import _update_config_for_provider
config_path = config_home / "config.yaml"
original_text = config_path.read_text(encoding="utf-8")
def _boom(path, data, **kwargs):
assert path == config_path
assert data["model"]["provider"] == "nous"
assert data["model"]["base_url"] == "https://inference.example.com/v1"
assert data["model"]["default"] == "some-old-model"
assert kwargs["sort_keys"] is False
raise OSError("simulated atomic write failure")
with patch("hermes_cli.auth.atomic_yaml_write", side_effect=_boom) as mock_write:
with pytest.raises(OSError, match="simulated atomic write failure"):
_update_config_for_provider(
"nous",
"https://inference.example.com/v1/",
default_model="llama-3.3",
)
assert mock_write.call_count == 1
assert config_path.read_text(encoding="utf-8") == original_text
def test_api_key_provider_saved_when_model_was_string(self, config_home, monkeypatch):
"""_model_flow_api_key_provider must persist the provider even when
config.model started as a plain string."""
@ -260,32 +286,6 @@ class TestProviderPersistsAfterModelSave:
assert model.get("default") == "minimax-m2.5"
assert model.get("api_mode") == "anthropic_messages"
def test_lmstudio_provider_saved_when_selected(self, config_home, monkeypatch):
from hermes_cli.config import load_config
from hermes_cli.main import _model_flow_api_key_provider
monkeypatch.setenv("LM_API_KEY", "lm-token")
monkeypatch.setattr(
"hermes_cli.auth._prompt_model_selection",
lambda models, current_model="": "publisher/model-a",
)
monkeypatch.setattr("hermes_cli.auth.deactivate_provider", lambda: None)
monkeypatch.setattr(
"hermes_cli.models.fetch_lmstudio_models",
lambda api_key=None, base_url=None, timeout=5.0: ["publisher/model-a"],
)
with patch("builtins.input", side_effect=[""]):
_model_flow_api_key_provider(load_config(), "lmstudio", "old-model")
import yaml
config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
model = config.get("model")
assert isinstance(model, dict)
assert model.get("provider") == "lmstudio"
assert model.get("base_url") == "http://127.0.0.1:1234/v1"
assert model.get("default") == "publisher/model-a"
class TestBaseUrlValidation:
@ -360,32 +360,3 @@ class TestBaseUrlValidation:
saved = get_env_value("GLM_BASE_URL") or ""
assert saved == "", "Empty input should not save a base URL"
def test_stepfun_provider_saved_with_selected_region(self, config_home, monkeypatch):
from hermes_cli.main import _model_flow_stepfun
from hermes_cli.config import load_config, get_env_value
monkeypatch.setenv("STEPFUN_API_KEY", "stepfun-test-key")
with patch(
"hermes_cli.main._prompt_provider_choice",
return_value=1,
), patch(
"hermes_cli.models.fetch_api_models",
return_value=["step-3.5-flash", "step-3-agent-lite"],
), patch(
"hermes_cli.auth._prompt_model_selection",
return_value="step-3-agent-lite",
), patch(
"hermes_cli.auth.deactivate_provider",
):
_model_flow_stepfun(load_config(), "old-model")
import yaml
config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
model = config.get("model")
assert isinstance(model, dict)
assert model.get("provider") == "stepfun"
assert model.get("default") == "step-3-agent-lite"
assert model.get("base_url") == "https://api.stepfun.com/step_plan/v1"
assert get_env_value("STEPFUN_BASE_URL") == "https://api.stepfun.com/step_plan/v1"

View file

@ -506,3 +506,64 @@ def test_lmstudio_picker_skips_probe_when_not_configured(monkeypatch):
)
assert "base_url" not in captured
def test_custom_providers_uses_live_models_for_multi_model_endpoint(monkeypatch):
"""Custom providers with api_key + base_url should prefer live /models.
Custom providers (section 4 of list_authenticated_providers) point at
gateways like Bifrost that expose hundreds of models. Reading only the
static ``models:`` dict from config.yaml leaves the /model picker with
a stale subset. Live discovery fills the picker with all available
models from the endpoint.
"""
monkeypatch.setattr("agent.models_dev.fetch_models_dev", lambda: {})
monkeypatch.setattr("hermes_cli.providers.HERMES_OVERLAYS", {})
calls = []
def fake_fetch_api_models(api_key, base_url):
calls.append((api_key, base_url))
return ["gateway-model-a", "gateway-model-b", "gateway-model-c"]
monkeypatch.setattr("hermes_cli.models.fetch_api_models", fake_fetch_api_models)
custom_providers = [
{
"name": "my-gateway",
"api_key": "sk-gateway-key",
"base_url": "https://gateway.example.com/v1",
"model": "gateway-model-a",
"models": {
"gateway-model-a": {"context_length": 128000},
"gateway-model-b": {"context_length": 128000},
},
}
]
providers = list_authenticated_providers(
current_provider="openrouter",
current_base_url="https://openrouter.ai/api/v1",
custom_providers=custom_providers,
max_models=50,
)
gateway_prov = next(
(
p
for p in providers
if p.get("api_url") == "https://gateway.example.com/v1"
),
None,
)
assert gateway_prov is not None, "Custom provider group not found in results"
assert calls == [("sk-gateway-key", "https://gateway.example.com/v1")], (
"fetch_api_models must be called with the custom provider's credentials"
)
assert gateway_prov["models"] == [
"gateway-model-a",
"gateway-model-b",
"gateway-model-c",
], "Live models must replace the static subset"
assert gateway_prov["total_models"] == 3

View file

@ -770,15 +770,6 @@ class TestValidateCodexAutoCorrection:
assert result.get("corrected_model") is None
assert result["message"] is None
def test_very_different_name_falls_to_suggestions(self):
"""Names too different for auto-correction are rejected with a suggestion list."""
codex_models = ["gpt-5.4-mini", "gpt-5.4", "gpt-5.3-codex"]
with patch("hermes_cli.models.provider_model_ids", return_value=codex_models):
result = validate_requested_model("totally-wrong", "openai-codex")
assert result["accepted"] is False
assert result["recognized"] is False
assert result.get("corrected_model") is None
assert "not found" in result["message"]
# -- probe_api_models — Cloudflare UA mitigation --------------------------------

View file

@ -401,6 +401,103 @@ class TestOllamaCloudProvidersNew:
assert pdef.transport == "openai_chat"
# ── Cloud Suffix Stripping ──
class TestOllamaCloudSuffixStripping:
"""models.dev appends :cloud / -cloud suffixes that the live API omits.
fetch_ollama_cloud_models() must normalise these before the dedup merge so
users never see broken IDs like 'kimi-k2.6:cloud' in the model picker.
"""
def test_strips_colon_cloud_suffix(self, tmp_path, monkeypatch):
""":cloud suffix from models.dev is stripped before merge."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
mock_mdev = {
"ollama-cloud": {
"models": {"kimi-k2.6:cloud": {"tool_call": True}}
}
}
with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = fetch_ollama_cloud_models(force_refresh=True)
assert "kimi-k2.6" in result
assert "kimi-k2.6:cloud" not in result
def test_strips_dash_cloud_suffix(self, tmp_path, monkeypatch):
"""-cloud suffix from models.dev is stripped before merge."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
mock_mdev = {
"ollama-cloud": {
"models": {"qwen3-coder:480b-cloud": {"tool_call": True}}
}
}
with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = fetch_ollama_cloud_models(force_refresh=True)
assert "qwen3-coder:480b" in result
assert "qwen3-coder:480b-cloud" not in result
def test_no_duplicate_when_live_clean_and_mdev_suffixed(self, tmp_path, monkeypatch):
"""Live API returns clean ID; mdev has :cloud variant — result has exactly one entry."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setenv("OLLAMA_API_KEY", "test-key")
mock_mdev = {
"ollama-cloud": {
"models": {
"kimi-k2.6:cloud": {"tool_call": True},
"glm-5.1:cloud": {"tool_call": True},
}
}
}
with patch("hermes_cli.models.fetch_api_models", return_value=["kimi-k2.6", "glm-5.1"]), \
patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = fetch_ollama_cloud_models(force_refresh=True)
assert result.count("kimi-k2.6") == 1
assert result.count("glm-5.1") == 1
assert "kimi-k2.6:cloud" not in result
assert "glm-5.1:cloud" not in result
def test_unsuffixed_model_id_unchanged(self, tmp_path, monkeypatch):
"""Model IDs without :cloud / -cloud suffix are passed through unchanged."""
from hermes_cli.models import fetch_ollama_cloud_models
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.delenv("OLLAMA_API_KEY", raising=False)
mock_mdev = {
"ollama-cloud": {
"models": {"nemotron-3-nano:30b": {"tool_call": True}}
}
}
with patch("agent.models_dev.fetch_models_dev", return_value=mock_mdev):
result = fetch_ollama_cloud_models(force_refresh=True)
assert "nemotron-3-nano:30b" in result
def test_strip_suffix_helper(self):
"""Unit test for the _strip_ollama_cloud_suffix helper."""
from hermes_cli.models import _strip_ollama_cloud_suffix
assert _strip_ollama_cloud_suffix("kimi-k2.6:cloud") == "kimi-k2.6"
assert _strip_ollama_cloud_suffix("glm-5.1:cloud") == "glm-5.1"
assert _strip_ollama_cloud_suffix("qwen3-coder:480b-cloud") == "qwen3-coder:480b"
assert _strip_ollama_cloud_suffix("nemotron-3-nano:30b") == "nemotron-3-nano:30b"
assert _strip_ollama_cloud_suffix("") == ""
# ── Auxiliary Model ──
class TestOllamaCloudAuxiliary:

View file

@ -0,0 +1,64 @@
"""Regression tests for OpenAI Codex model validation when the listing lags behind
actually usable backend model IDs.
The bug originally reported in #16172: `/model` and `switch_model()` rejected
`gpt-5.3-codex-spark` because the curated listing omitted it, even though direct
runtime calls succeeded. PR #19729 fixed this by soft-accepting unknown-but-
plausible Codex slugs with a warning, and this test pins the soft-accept
behavior so it doesn't regress.
Note: gpt-5.3-codex-spark itself is now in the curated catalog (PR #22991),
so the real-world Spark request takes the `recognized=True` fast path. This
test still uses Spark as the example slug but explicitly mocks
``provider_model_ids`` to omit it, exercising the soft-accept path generically
for any future entitlement-gated Codex slug that ships before Hermes catalogs
it.
"""
from unittest.mock import patch
from hermes_cli.model_switch import switch_model
from hermes_cli.models import validate_requested_model
def test_openai_codex_unknown_but_plausible_model_is_accepted_with_warning():
"""If the Codex listing is incomplete, `/model` should soft-accept the model
with a warning instead of hard-rejecting it.
"""
with patch(
"hermes_cli.models.provider_model_ids",
return_value=["gpt-5.5", "gpt-5.4", "gpt-5.3-codex"],
):
result = validate_requested_model("gpt-5.3-codex-spark", "openai-codex")
assert result["accepted"] is True
assert result["persist"] is True
assert result["recognized"] is False
assert "gpt-5.3-codex-spark" in result["message"]
assert "OpenAI Codex model listing" in result["message"]
assert "Similar models" in result["message"]
assert "gpt-5.3-codex" in result["message"]
def test_switch_model_allows_openai_codex_model_missing_from_listing():
"""switch_model() should succeed for Codex models that the runtime accepts
even when the listing has not caught up yet.
"""
with patch(
"hermes_cli.models.provider_model_ids",
return_value=["gpt-5.5", "gpt-5.4", "gpt-5.3-codex"],
):
result = switch_model(
"gpt-5.3-codex-spark",
current_provider="openai-codex",
current_model="gpt-5.4",
current_base_url="",
current_api_key="",
user_providers=None,
)
assert result.success is True
assert result.new_model == "gpt-5.3-codex-spark"
assert result.target_provider == "openai-codex"
assert result.warning_message
assert "OpenAI Codex model listing" in result.warning_message

View file

@ -0,0 +1,159 @@
"""Tests for opencode-go / opencode-zen flat-namespace model handling.
OpenCode Go is NOT a vendor/model aggregator like OpenRouter its
``/v1/models`` endpoint returns bare IDs (``minimax-m2.7``, ``deepseek-v4-flash``)
and the inference API rejects vendor-prefixed names with HTTP 401
"Model not supported".
Two bugs this exercises:
1. ``switch_model('deepseek-v4-flash', current_provider='opencode-go')`` used
to silently switch the user off opencode-go to native ``deepseek`` because
``detect_provider_for_model`` matched the bare name against the static
deepseek catalog. Fix: once step d matches the model in the current
aggregator's live catalog, skip ``detect_provider_for_model``.
2. ``normalize_model_for_provider('minimax/minimax-m2.7', 'opencode-go')``
used to pass the ``minimax/`` prefix through unchanged. When user configs
contained prefixed fallback entries (commonly copied from aggregator slugs),
the fallback activation path sent ``minimax/minimax-m2.7`` to opencode-go
which returned HTTP 401. Fix: opencode-go/opencode-zen strip ANY leading
``vendor/`` prefix because their APIs are flat-namespace.
"""
from unittest.mock import patch
from hermes_cli.model_normalize import normalize_model_for_provider
from hermes_cli.model_switch import switch_model
# Live catalog opencode-go currently returns from /v1/models (snapshot).
_OPENCODE_GO_LIVE = [
"minimax-m2.7", "minimax-m2.5",
"kimi-k2.6", "kimi-k2.5",
"glm-5.1", "glm-5",
"deepseek-v4-pro", "deepseek-v4-flash",
"qwen3.6-plus", "qwen3.5-plus",
"mimo-v2-pro", "mimo-v2-omni", "mimo-v2.5-pro", "mimo-v2.5",
]
# ---------------------------------------------------------------------------
# normalize_model_for_provider: strip vendor prefix for flat-namespace providers
# ---------------------------------------------------------------------------
def test_opencode_go_strips_deepseek_prefix():
assert normalize_model_for_provider(
"deepseek/deepseek-v4-flash", "opencode-go"
) == "deepseek-v4-flash"
def test_opencode_go_strips_minimax_prefix():
assert normalize_model_for_provider(
"minimax/minimax-m2.7", "opencode-go"
) == "minimax-m2.7"
def test_opencode_go_strips_moonshotai_prefix():
# Moonshot's aggregator vendor is `moonshotai/...` — a common copy-paste
# from OpenRouter slugs. opencode-go serves it bare as `kimi-k2.6`.
assert normalize_model_for_provider(
"moonshotai/kimi-k2.6", "opencode-go"
) == "kimi-k2.6"
def test_opencode_go_bare_name_unchanged():
assert normalize_model_for_provider(
"kimi-k2.6", "opencode-go"
) == "kimi-k2.6"
def test_opencode_go_preserves_dot_versioning():
# opencode-go uses dot-versioned IDs (`mimo-v2.5-pro`, not hyphen).
assert normalize_model_for_provider(
"xiaomi/mimo-v2.5-pro", "opencode-go"
) == "mimo-v2.5-pro"
def test_opencode_zen_still_hyphenates_claude():
# Regression: opencode-zen's Claude hyphen conversion must still work.
assert normalize_model_for_provider(
"anthropic/claude-sonnet-4.6", "opencode-zen"
) == "claude-sonnet-4-6"
def test_opencode_zen_bare_claude_hyphenated():
assert normalize_model_for_provider(
"claude-sonnet-4.6", "opencode-zen"
) == "claude-sonnet-4-6"
def test_opencode_zen_strips_arbitrary_vendor_prefix():
assert normalize_model_for_provider(
"minimax/minimax-m2.5-free", "opencode-zen"
) == "minimax-m2.5-free"
def test_openrouter_still_prepends_vendor():
# Regression: real aggregators must still get vendor/model format.
assert normalize_model_for_provider(
"claude-sonnet-4.6", "openrouter"
) == "anthropic/claude-sonnet-4.6"
# ---------------------------------------------------------------------------
# switch_model: live-catalog match on opencode-go must not trigger
# cross-provider auto-switch via detect_provider_for_model
# ---------------------------------------------------------------------------
def _run_switch(raw_input: str, **extra):
"""Call switch_model with opencode-go as current provider, mocking the
live catalog so the test doesn't hit the network."""
defaults = dict(
current_provider="opencode-go",
current_model="kimi-k2.6",
current_base_url="https://opencode.ai/zen/go/v1",
current_api_key="sk-test-opencode-go",
is_global=False,
)
defaults.update(extra)
def fake_list_provider_models(provider: str):
if provider == "opencode-go":
return list(_OPENCODE_GO_LIVE)
# For other providers, return empty so tests don't depend on them.
return []
with patch(
"hermes_cli.model_switch.list_provider_models",
side_effect=fake_list_provider_models,
):
return switch_model(raw_input=raw_input, **defaults)
def test_deepseek_v4_flash_stays_on_opencode_go():
"""Regression: ``/model deepseek-v4-flash`` while on opencode-go must
NOT switch to native deepseek just because deepseek's static catalog
also contains that name."""
result = _run_switch("deepseek-v4-flash")
assert result.target_provider == "opencode-go", (
f"Expected to stay on opencode-go, got {result.target_provider}. "
f"detect_provider_for_model hijacked the bare name."
)
assert result.new_model == "deepseek-v4-flash"
def test_deepseek_v4_pro_stays_on_opencode_go():
"""Same bug class as the flash variant."""
result = _run_switch("deepseek-v4-pro")
assert result.target_provider == "opencode-go"
assert result.new_model == "deepseek-v4-pro"
def test_kimi_k2_6_stays_on_opencode_go():
"""Regression guard: this path was always working, keep it working."""
result = _run_switch("kimi-k2.6", current_model="deepseek-v4-pro")
assert result.target_provider == "opencode-go"
assert result.new_model == "kimi-k2.6"

View file

@ -0,0 +1,75 @@
"""Tests for `_pin_kanban_board_env` helper invoked by `cmd_chat`.
Regression coverage for #20074: a chat session must export the active kanban
board into `HERMES_KANBAN_BOARD` at boot so subprocess shell-outs (e.g.
`hermes kanban `) inherit the same board the in-process kanban tools resolve.
Without this, a concurrent `hermes kanban boards switch` from another session
can flip the global current-board file mid-turn and silently divert the
shell calls to a different DB.
"""
import importlib
import os
import pytest
@pytest.fixture(autouse=True)
def _isolate_kanban_board_env():
"""Snapshot `HERMES_KANBAN_BOARD` and restore it after the test.
`_pin_kanban_board_env()` writes to ``os.environ`` directly, bypassing
any ``monkeypatch.setenv`` tracking. Without this fixture the mutation
leaks into subsequent tests and breaks anything that resolves a kanban
path from the env (e.g. ``TestSharedBoardPaths`` in test_kanban_db.py).
"""
prev = os.environ.get("HERMES_KANBAN_BOARD")
os.environ.pop("HERMES_KANBAN_BOARD", None)
try:
yield
finally:
if prev is None:
os.environ.pop("HERMES_KANBAN_BOARD", None)
else:
os.environ["HERMES_KANBAN_BOARD"] = prev
def test_pin_writes_resolved_board_when_env_unset(monkeypatch):
main_mod = importlib.import_module("hermes_cli.main")
import hermes_cli.kanban_db as kdb
monkeypatch.setattr(kdb, "get_current_board", lambda: "space")
main_mod._pin_kanban_board_env()
assert main_mod.os.environ.get("HERMES_KANBAN_BOARD") == "space"
def test_pin_does_not_overwrite_existing_env(monkeypatch):
monkeypatch.setenv("HERMES_KANBAN_BOARD", "preset")
main_mod = importlib.import_module("hermes_cli.main")
import hermes_cli.kanban_db as kdb
def _explode():
raise AssertionError("get_current_board must not be called when env is set")
monkeypatch.setattr(kdb, "get_current_board", _explode)
main_mod._pin_kanban_board_env()
assert main_mod.os.environ.get("HERMES_KANBAN_BOARD") == "preset"
def test_pin_swallows_resolution_failures(monkeypatch):
main_mod = importlib.import_module("hermes_cli.main")
import hermes_cli.kanban_db as kdb
def _boom():
raise RuntimeError("disk gone")
monkeypatch.setattr(kdb, "get_current_board", _boom)
main_mod._pin_kanban_board_env()
assert "HERMES_KANBAN_BOARD" not in main_mod.os.environ

View file

@ -21,6 +21,7 @@ from hermes_cli.plugins import (
get_plugin_command_handler,
get_plugin_commands,
get_pre_tool_call_block_message,
resolve_plugin_command_result,
discover_plugins,
invoke_hook,
)
@ -329,6 +330,7 @@ class TestPluginHooks:
assert "post_api_request" in VALID_HOOKS
assert "transform_terminal_output" in VALID_HOOKS
assert "transform_tool_result" in VALID_HOOKS
assert "transform_llm_output" in VALID_HOOKS
def test_valid_hooks_include_pre_gateway_dispatch(self):
assert "pre_gateway_dispatch" in VALID_HOOKS
@ -1061,6 +1063,45 @@ class TestPluginCommands:
assert mgr._plugin_commands["cmd-b"]["plugin"] == "plugin-b"
class TestPluginCommandResultResolution:
def test_returns_sync_values_unchanged(self):
assert resolve_plugin_command_result("ok") == "ok"
def test_awaits_async_result_without_running_loop(self):
async def _handler():
return "async-ok"
assert resolve_plugin_command_result(_handler()) == "async-ok"
def test_awaits_async_result_with_running_loop(self, monkeypatch):
class _Loop:
pass
async def _handler():
return "threaded-ok"
monkeypatch.setattr("hermes_cli.plugins.asyncio.get_running_loop", lambda: _Loop())
assert resolve_plugin_command_result(_handler()) == "threaded-ok"
def test_running_loop_timeout_does_not_hang_forever(self, monkeypatch):
"""Threaded path must abort a hung async handler instead of blocking the caller."""
import asyncio as _asyncio
class _Loop:
pass
async def _slow_handler():
await _asyncio.sleep(10)
return "should-not-reach"
monkeypatch.setattr("hermes_cli.plugins.asyncio.get_running_loop", lambda: _Loop())
monkeypatch.setattr("hermes_cli.plugins._PLUGIN_COMMAND_AWAIT_TIMEOUT_SECS", 0.1)
import pytest
with pytest.raises(TimeoutError):
resolve_plugin_command_result(_slow_handler())
# ── TestPluginDispatchTool ────────────────────────────────────────────────
@ -1191,3 +1232,77 @@ class TestPluginDispatchTool:
result = ctx.dispatch_tool("fake", {})
assert '"error"' in result
class TestPluginDebugLogging:
"""HERMES_PLUGINS_DEBUG opt-in stderr handler for plugin developers."""
def test_debug_handler_not_installed_when_env_var_absent(self, monkeypatch):
"""Without the env var, no stderr handler is attached."""
monkeypatch.delenv("HERMES_PLUGINS_DEBUG", raising=False)
from hermes_cli import plugins as plugins_mod
# Snapshot, then force a re-evaluation.
original_installed = plugins_mod._DEBUG_HANDLER_INSTALLED
original_debug = plugins_mod._PLUGINS_DEBUG
original_handlers = list(plugins_mod.logger.handlers)
try:
plugins_mod._DEBUG_HANDLER_INSTALLED = False
plugins_mod._install_plugin_debug_handler(force=True)
assert plugins_mod._PLUGINS_DEBUG is False
assert plugins_mod._DEBUG_HANDLER_INSTALLED is False
# No new stderr handler was attached.
assert plugins_mod.logger.handlers == original_handlers
finally:
plugins_mod._DEBUG_HANDLER_INSTALLED = original_installed
plugins_mod._PLUGINS_DEBUG = original_debug
plugins_mod.logger.handlers = original_handlers
def test_debug_handler_installed_when_env_var_set(self, monkeypatch):
"""With HERMES_PLUGINS_DEBUG=1, a DEBUG-level stderr handler is attached."""
monkeypatch.setenv("HERMES_PLUGINS_DEBUG", "1")
from hermes_cli import plugins as plugins_mod
original_installed = plugins_mod._DEBUG_HANDLER_INSTALLED
original_debug = plugins_mod._PLUGINS_DEBUG
original_level = plugins_mod.logger.level
original_handlers = list(plugins_mod.logger.handlers)
try:
plugins_mod._DEBUG_HANDLER_INSTALLED = False
plugins_mod._install_plugin_debug_handler(force=True)
assert plugins_mod._PLUGINS_DEBUG is True
assert plugins_mod._DEBUG_HANDLER_INSTALLED is True
assert plugins_mod.logger.level == logging.DEBUG
new_handlers = [
h for h in plugins_mod.logger.handlers if h not in original_handlers
]
assert len(new_handlers) == 1
assert isinstance(new_handlers[0], logging.StreamHandler)
assert new_handlers[0].level == logging.DEBUG
finally:
plugins_mod._DEBUG_HANDLER_INSTALLED = original_installed
plugins_mod._PLUGINS_DEBUG = original_debug
plugins_mod.logger.setLevel(original_level)
plugins_mod.logger.handlers = original_handlers
def test_debug_handler_idempotent(self, monkeypatch):
"""Calling install twice (without force) does not double-attach."""
monkeypatch.setenv("HERMES_PLUGINS_DEBUG", "1")
from hermes_cli import plugins as plugins_mod
original_installed = plugins_mod._DEBUG_HANDLER_INSTALLED
original_debug = plugins_mod._PLUGINS_DEBUG
original_level = plugins_mod.logger.level
original_handlers = list(plugins_mod.logger.handlers)
try:
plugins_mod._DEBUG_HANDLER_INSTALLED = False
plugins_mod._install_plugin_debug_handler(force=True)
count_after_first = len(plugins_mod.logger.handlers)
plugins_mod._install_plugin_debug_handler() # no force
count_after_second = len(plugins_mod.logger.handlers)
assert count_after_first == count_after_second
finally:
plugins_mod._DEBUG_HANDLER_INSTALLED = original_installed
plugins_mod._PLUGINS_DEBUG = original_debug
plugins_mod.logger.setLevel(original_level)
plugins_mod.logger.handlers = original_handlers

View file

@ -12,9 +12,11 @@ import pytest
import yaml
from hermes_cli.plugins_cmd import (
PluginOperationError,
_copy_example_files,
_read_manifest,
_repo_name_from_url,
_resolve_git_executable,
_resolve_git_url,
_sanitize_plugin_name,
plugins_command,
@ -99,6 +101,69 @@ class TestResolveGitUrl:
_resolve_git_url("a/b/c")
# ── _resolve_git_executable ─────────────────────────────────────────────────
class TestResolveGitExecutable:
"""Fallback resolution when bare ``git`` is not discoverable via ``PATH``."""
def teardown_method(self):
_resolve_git_executable.cache_clear()
def test_prefers_shutil_which(self):
import hermes_cli.plugins_cmd as pc
_resolve_git_executable.cache_clear()
with patch.object(pc.shutil, "which", return_value="/usr/local/bin/git"):
assert pc._resolve_git_executable() == "/usr/local/bin/git"
def test_fallback_posix_first_matching_path(self):
import hermes_cli.plugins_cmd as pc
_resolve_git_executable.cache_clear()
def _isfile(p: str) -> bool:
return p == "/usr/local/bin/git"
with patch.object(pc.shutil, "which", return_value=None):
with patch.object(pc.os, "name", "posix"):
with patch.object(pc.os.path, "isfile", side_effect=_isfile):
assert pc._resolve_git_executable() == "/usr/local/bin/git"
def test_returns_none_when_unavailable(self):
import hermes_cli.plugins_cmd as pc
_resolve_git_executable.cache_clear()
with patch.object(pc.shutil, "which", return_value=None):
with patch.object(pc.os, "name", "posix"):
with patch.object(pc.os.path, "isfile", return_value=False):
assert pc._resolve_git_executable() is None
def test_git_pull_uses_resolved_executable(self, tmp_path):
import hermes_cli.plugins_cmd as pc
_resolve_git_executable.cache_clear()
with patch.object(
pc,
"_resolve_git_executable",
return_value="/resolved/git",
):
with patch.object(pc.subprocess, "run") as run:
run.return_value = MagicMock(returncode=0, stdout="Already up to date\n", stderr="")
ok, msg = pc._git_pull_plugin_dir(tmp_path)
assert ok is True
run.assert_called_once()
assert run.call_args[0][0][0] == "/resolved/git"
def test_install_core_raises_when_git_unresolved(self):
import hermes_cli.plugins_cmd as pc
_resolve_git_executable.cache_clear()
with patch.object(pc, "_resolve_git_executable", return_value=None):
with pytest.raises(PluginOperationError, match="git is not installed"):
pc._install_plugin_core("owner/repo", force=True)
# ── _repo_name_from_url ──────────────────────────────────────────────────
@ -508,7 +573,7 @@ class TestPromptPluginEnvVars:
class TestCursesRadiolist:
"""Test the curses_radiolist function (non-TTY fallback path)."""
"""Test the curses_radiolist function."""
def test_non_tty_returns_default(self):
from hermes_cli.curses_ui import curses_radiolist
@ -524,6 +589,14 @@ class TestCursesRadiolist:
result = curses_radiolist("Pick", ["x", "y"], selected=0, cancel_returns=1)
assert result == 1
def test_keyboard_interrupt_returns_cancel_value(self):
from hermes_cli.curses_ui import curses_radiolist
with patch("sys.stdin") as mock_stdin, patch("curses.wrapper", side_effect=KeyboardInterrupt):
mock_stdin.isatty.return_value = True
result = curses_radiolist("Pick", ["x", "y"], selected=0, cancel_returns=-1)
assert result == -1
# ── Provider discovery helpers ───────────────────────────────────────────

View file

@ -0,0 +1,71 @@
"""Tests for the post_setup install-state gate in `_toolset_needs_configuration_prompt`.
Regression coverage for the cua-driver silent-no-op bug (issue #22737).
When a no-key provider's only install side-effect is a `post_setup` hook
(cua-driver, etc.), the gate function used to fall through to the
`_toolset_has_keys` catch-all, which returned True for any provider with
empty `env_vars` causing `hermes tools` to write the toolset to config
and exit ` Saved` without ever invoking the post_setup install. These
tests pin the new predicate-aware behaviour so the regression doesn't
sneak back in.
"""
from __future__ import annotations
class TestPostSetupGate:
def test_cua_driver_missing_forces_setup(self, monkeypatch, tmp_path):
"""When cua-driver isn't on PATH, the gate must return True so the
provider-setup flow runs and triggers `_run_post_setup`."""
from hermes_cli import tools_config
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setattr(tools_config.shutil, "which", lambda name: None)
assert tools_config._toolset_needs_configuration_prompt(
"computer_use", {}
) is True
def test_cua_driver_installed_skips_setup(self, monkeypatch, tmp_path):
"""When cua-driver is already on PATH, the gate must return False
so a re-save through `hermes tools` doesn't re-prompt the user."""
from hermes_cli import tools_config
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
monkeypatch.setattr(
tools_config.shutil,
"which",
lambda name: "/usr/local/bin/cua-driver" if name == "cua-driver" else None,
)
assert tools_config._toolset_needs_configuration_prompt(
"computer_use", {}
) is False
def test_post_setup_predicate_exception_does_not_block(self, monkeypatch):
"""A predicate that raises must be treated as 'satisfied' so a
broken check can't strand the user in an infinite setup loop."""
from hermes_cli import tools_config
def _boom():
raise RuntimeError("predicate broken")
monkeypatch.setitem(tools_config._POST_SETUP_INSTALLED, "cua_driver", _boom)
assert tools_config._post_setup_already_installed("cua_driver") is True
def test_unregistered_post_setup_treated_as_satisfied(self):
"""post_setup keys without a registered predicate must default to
'satisfied' so we don't change behaviour for hooks we haven't
explicitly opted in (kittentts, piper, agent_browser, etc.)."""
from hermes_cli import tools_config
assert tools_config._post_setup_already_installed("does_not_exist") is True
def test_cua_driver_predicate_registered(self):
"""Keep an explicit pin on the cua_driver entry so accidental
deletion of the registry row would fail this test rather than
silently restore the original silent-no-op bug."""
from hermes_cli import tools_config
assert "cua_driver" in tools_config._POST_SETUP_INSTALLED

View file

@ -0,0 +1,584 @@
"""Tests for hermes_cli.profile_distribution — git-based profile installs.
Covers manifest parsing, version requirement checks, install / update / describe
on local-directory sources, and guards on what can and can't be installed.
Transport-layer tests (git clone, URL handling) are exercised through live
E2E runs, not unit tests git itself is tested upstream, and subprocess-
mocking git would just test the mock.
"""
from __future__ import annotations
import os
from pathlib import Path
import pytest
from hermes_cli.profile_distribution import (
DEFAULT_DIST_OWNED,
DistributionError,
DistributionManifest,
EnvRequirement,
MANIFEST_FILENAME,
USER_OWNED_EXCLUDE,
_env_template_from_manifest,
_looks_like_git_url,
_parse_semver,
check_hermes_requires,
describe_distribution,
install_distribution,
plan_install,
read_manifest,
update_distribution,
write_manifest,
)
# ---------------------------------------------------------------------------
# Isolated profile env (matches tests/hermes_cli/test_profiles.py)
# ---------------------------------------------------------------------------
@pytest.fixture()
def profile_env(tmp_path, monkeypatch):
monkeypatch.setattr(Path, "home", lambda: tmp_path)
default_home = tmp_path / ".hermes"
default_home.mkdir(exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(default_home))
return tmp_path
def _make_staging_dir(root: Path, name: str = "src", *, manifest: DistributionManifest = None) -> Path:
"""Build a local distribution staging directory (what a git clone would
contain after .git is removed).
Lays down a minimal but representative tree: SOUL.md, config.yaml,
mcp.json, one skill, one cron file, plus the distribution.yaml manifest.
"""
staged = root / f"staging_{name}"
staged.mkdir(parents=True, exist_ok=True)
(staged / "SOUL.md").write_text("I am Source.\n")
(staged / "config.yaml").write_text("model:\n model: gpt-4\n")
(staged / "mcp.json").write_text('{"servers": {}}\n')
(staged / "skills").mkdir(exist_ok=True)
(staged / "skills" / "demo").mkdir(exist_ok=True)
(staged / "skills" / "demo" / "SKILL.md").write_text(
"---\nname: demo\ndescription: test\n---\n# Demo skill\n"
)
(staged / "cron").mkdir(exist_ok=True)
(staged / "cron" / "daily.json").write_text('{"schedule": "0 9 * * *"}')
mf = manifest or DistributionManifest(name=name, version="0.1.0")
write_manifest(staged, mf)
return staged
# ===========================================================================
# Manifest parsing
# ===========================================================================
class TestManifestParsing:
def test_minimal_manifest(self, tmp_path):
(tmp_path / MANIFEST_FILENAME).write_text("name: minimal\n")
m = read_manifest(tmp_path)
assert m.name == "minimal"
assert m.version == "0.1.0"
assert m.env_requires == []
assert m.distribution_owned == []
def test_full_manifest(self, tmp_path):
(tmp_path / MANIFEST_FILENAME).write_text(
"name: telem\n"
"version: 1.2.3\n"
"description: Telem monitor\n"
"hermes_requires: '>=0.12.0'\n"
"author: Kyle\n"
"license: MIT\n"
"env_requires:\n"
" - name: OPENAI_API_KEY\n"
" description: OpenAI key\n"
" - name: GRAPH_URL\n"
" required: false\n"
" default: http://127.0.0.1:8000\n"
"distribution_owned:\n"
" - SOUL.md\n"
" - skills/\n"
)
m = read_manifest(tmp_path)
assert m.name == "telem"
assert m.version == "1.2.3"
assert m.author == "Kyle"
assert m.license == "MIT"
assert len(m.env_requires) == 2
assert m.env_requires[0].name == "OPENAI_API_KEY"
assert m.env_requires[0].required is True
assert m.env_requires[1].required is False
assert m.env_requires[1].default == "http://127.0.0.1:8000"
assert m.distribution_owned == ["SOUL.md", "skills"]
def test_missing_name_rejected(self, tmp_path):
(tmp_path / MANIFEST_FILENAME).write_text("version: 1.0\n")
with pytest.raises(DistributionError, match="missing 'name'"):
read_manifest(tmp_path)
def test_env_requires_not_list_rejected(self, tmp_path):
(tmp_path / MANIFEST_FILENAME).write_text(
"name: bad\nenv_requires:\n name: FOO\n"
)
with pytest.raises(DistributionError, match="env_requires must be a list"):
read_manifest(tmp_path)
def test_read_manifest_returns_none_when_absent(self, tmp_path):
assert read_manifest(tmp_path) is None
def test_owned_paths_default(self):
m = DistributionManifest(name="x")
assert m.owned_paths() == list(DEFAULT_DIST_OWNED)
def test_owned_paths_explicit(self):
m = DistributionManifest(name="x", distribution_owned=["SOUL.md", "skills"])
assert m.owned_paths() == ["SOUL.md", "skills"]
def test_roundtrip_write_read(self, tmp_path):
original = DistributionManifest(
name="rt",
version="1.0.0",
description="roundtrip",
env_requires=[EnvRequirement(name="FOO", description="foo")],
)
write_manifest(tmp_path, original)
parsed = read_manifest(tmp_path)
assert parsed.name == "rt"
assert parsed.env_requires[0].name == "FOO"
# ===========================================================================
# Version requirement checks
# ===========================================================================
class TestVersionRequires:
@pytest.mark.parametrize("spec,cur,ok", [
("", "0.1.0", True),
(">=0.12.0", "0.12.0", True),
(">=0.12.0", "0.13.0", True),
(">=0.12.0", "0.11.9", False),
("==0.12.0", "0.12.0", True),
("==0.12.0", "0.13.0", False),
("!=0.12.0", "0.13.0", True),
(">0.12.0", "0.12.1", True),
(">0.12.0", "0.12.0", False),
("<0.13.0", "0.12.9", True),
("<=0.12.0", "0.12.0", True),
("0.12.0", "0.13.0", True), # Bare = >=
("0.12.0", "0.11.0", False), # Bare = >=
])
def test_check_matrix(self, spec, cur, ok):
if ok:
check_hermes_requires(spec, cur)
else:
with pytest.raises(DistributionError, match="requires Hermes"):
check_hermes_requires(spec, cur)
def test_parse_semver_handles_prerelease(self):
assert _parse_semver("0.12.0-rc1") == (0, 12, 0)
assert _parse_semver("v0.12.0+abc") == (0, 12, 0)
def test_parse_semver_pads(self):
assert _parse_semver("1") == (1, 0, 0)
assert _parse_semver("1.2") == (1, 2, 0)
def test_parse_semver_rejects_garbage(self):
with pytest.raises(DistributionError, match="Unparseable"):
_parse_semver("not-a-version")
# ===========================================================================
# Env template
# ===========================================================================
class TestEnvTemplate:
def test_required_is_uncommented(self):
m = DistributionManifest(
name="x",
env_requires=[EnvRequirement(name="FOO", description="foo key")],
)
out = _env_template_from_manifest(m)
assert "# foo key" in out
assert "# (required)" in out
assert "FOO=" in out
# No leading `# ` before FOO=
assert "\nFOO=" in out or out.startswith("FOO=") or "\nFOO=\n" in out or "FOO=\n" in out
def test_optional_is_commented(self):
m = DistributionManifest(
name="x",
env_requires=[EnvRequirement(name="BAR", required=False, default="http://x")],
)
out = _env_template_from_manifest(m)
assert "# (optional)" in out
assert "# BAR=http://x" in out
def test_empty_env_requires_is_header_only(self):
m = DistributionManifest(name="x")
out = _env_template_from_manifest(m)
assert "Hermes distribution" in out
assert "FOO" not in out
# ===========================================================================
# Source URL detection
# ===========================================================================
class TestLooksLikeGitUrl:
@pytest.mark.parametrize("src", [
"github.com/user/repo",
"https://github.com/user/repo",
"https://github.com/user/repo.git",
"http://example.com/repo",
"git@github.com:user/repo.git",
"ssh://git@example.com/repo.git",
"git://example.com/repo.git",
])
def test_accepts_git_sources(self, src):
assert _looks_like_git_url(src)
@pytest.mark.parametrize("src", [
"/tmp/local/path",
"./relative/dir",
"~/profile",
"some-random-string",
])
def test_rejects_non_git(self, src):
assert not _looks_like_git_url(src)
# ===========================================================================
# Install — fresh and force (from a local-directory source)
# ===========================================================================
class TestInstall:
def test_install_from_directory(self, profile_env):
staged = _make_staging_dir(profile_env, "src")
plan = install_distribution(str(staged), name="installed")
assert plan.target_dir.is_dir()
assert (plan.target_dir / "SOUL.md").read_text() == "I am Source.\n"
assert (plan.target_dir / "skills" / "demo" / "SKILL.md").exists()
assert (plan.target_dir / "mcp.json").exists()
# Manifest on disk records canonical name + provenance
m = read_manifest(plan.target_dir)
assert m.name == "installed"
assert m.source == str(staged)
def test_install_uses_manifest_name_when_no_override(self, profile_env):
mf = DistributionManifest(name="telem", version="1.0.0")
staged = _make_staging_dir(profile_env, "telem", manifest=mf)
plan = install_distribution(str(staged))
assert plan.manifest.name == "telem"
assert plan.target_dir.name == "telem"
def test_install_rejects_existing_without_force(self, profile_env):
staged = _make_staging_dir(profile_env, "src")
install_distribution(str(staged), name="existing")
with pytest.raises(DistributionError, match="already exists"):
install_distribution(str(staged), name="existing")
def test_install_with_force_overwrites(self, profile_env):
staged = _make_staging_dir(profile_env, "src")
install_distribution(str(staged), name="target")
# Install again with --force succeeds
plan = install_distribution(str(staged), name="target", force=True)
assert plan.target_dir.is_dir()
def test_install_rejects_default_name(self, profile_env):
staged = _make_staging_dir(profile_env, "src")
with pytest.raises(DistributionError, match="Cannot install"):
install_distribution(str(staged), name="default")
def test_install_rejects_non_distribution_directory(self, profile_env, tmp_path):
bogus = tmp_path / "bogus_dir"
bogus.mkdir()
(bogus / "some_file").write_text("hi")
with pytest.raises(DistributionError, match="No distribution.yaml"):
plan_install(str(bogus), tmp_path / "work", override_name="x")
def test_install_rejects_unknown_source(self, profile_env, tmp_path):
with pytest.raises(DistributionError, match="Cannot resolve"):
plan_install("definitely-not-a-thing", tmp_path / "work", override_name="x")
def test_install_emits_env_example_when_manifest_has_env(self, profile_env):
mf = DistributionManifest(
name="needs_env",
version="0.1.0",
env_requires=[EnvRequirement(name="OPENAI_API_KEY", description="key")],
)
staged = _make_staging_dir(profile_env, "needs_env", manifest=mf)
plan = install_distribution(str(staged), name="needs_env")
example = plan.target_dir / ".env.EXAMPLE"
assert example.is_file()
assert "OPENAI_API_KEY" in example.read_text()
def test_install_enforces_hermes_requires(self, profile_env, monkeypatch):
# Pin current Hermes version to something well below the requirement
import hermes_cli
monkeypatch.setattr(hermes_cli, "__version__", "0.1.0", raising=False)
mf = DistributionManifest(
name="future",
version="1.0.0",
hermes_requires=">=99.0.0",
)
staged = _make_staging_dir(profile_env, "future", manifest=mf)
with pytest.raises(DistributionError, match="requires Hermes"):
install_distribution(str(staged), name="future")
# ===========================================================================
# Update — preserves user data, preserves config by default
# ===========================================================================
class TestUpdate:
def test_update_preserves_user_data(self, profile_env):
# 1. Build staging dir, install
staged = _make_staging_dir(profile_env, "src")
plan = install_distribution(str(staged), name="telem")
# 2. Add user-owned data to the installed profile
(plan.target_dir / "memories").mkdir(exist_ok=True)
(plan.target_dir / "memories" / "MEMORY.md").write_text("# USER MEMORY\n")
(plan.target_dir / ".env").write_text("OPENAI_API_KEY=sk-user\n")
(plan.target_dir / "auth.json").write_text('{"user": "auth"}')
(plan.target_dir / "sessions").mkdir(exist_ok=True)
(plan.target_dir / "sessions" / "chat.json").write_text('{"s": 1}')
# 3. Bump source in the staging dir
(staged / "SOUL.md").write_text("I am Source v2.\n")
# 4. Update
update_distribution("telem", force_config=False)
# 5. Dist-owned changed
assert (plan.target_dir / "SOUL.md").read_text() == "I am Source v2.\n"
# 6. User-owned preserved
assert (plan.target_dir / "memories" / "MEMORY.md").read_text() == "# USER MEMORY\n"
assert (plan.target_dir / ".env").read_text() == "OPENAI_API_KEY=sk-user\n"
assert (plan.target_dir / "auth.json").read_text() == '{"user": "auth"}'
assert (plan.target_dir / "sessions" / "chat.json").read_text() == '{"s": 1}'
def test_update_preserves_config_by_default(self, profile_env):
staged = _make_staging_dir(profile_env, "src")
plan = install_distribution(str(staged), name="t2")
# User edits config
(plan.target_dir / "config.yaml").write_text(
"model:\n model: gpt-5\n# user override\n"
)
# Bump source config
(staged / "config.yaml").write_text("model:\n model: claude\n")
update_distribution("t2", force_config=False)
assert "gpt-5" in (plan.target_dir / "config.yaml").read_text()
assert "user override" in (plan.target_dir / "config.yaml").read_text()
def test_update_force_config_overwrites(self, profile_env):
staged = _make_staging_dir(profile_env, "src")
plan = install_distribution(str(staged), name="t3")
(plan.target_dir / "config.yaml").write_text("model:\n model: gpt-5\n")
(staged / "config.yaml").write_text("model:\n model: claude\n")
update_distribution("t3", force_config=True)
assert "claude" in (plan.target_dir / "config.yaml").read_text()
assert "gpt-5" not in (plan.target_dir / "config.yaml").read_text()
def test_update_missing_manifest_errors(self, profile_env):
# Make a profile without a manifest; update must refuse
from hermes_cli.profiles import create_profile
create_profile(name="plain", no_alias=True)
with pytest.raises(DistributionError, match="not a distribution"):
update_distribution("plain")
# ===========================================================================
# describe_distribution — info subcommand
# ===========================================================================
class TestDescribe:
def test_describe_existing_distribution(self, profile_env):
mf = DistributionManifest(
name="telem",
version="1.0.0",
description="compliance monitor",
env_requires=[EnvRequirement(name="API", description="api key")],
)
staged = _make_staging_dir(profile_env, "telem", manifest=mf)
install_distribution(str(staged), name="telem")
data = describe_distribution("telem")
assert data["name"] == "telem"
assert data["version"] == "1.0.0"
assert data["env_requires"][0]["name"] == "API"
def test_describe_non_distribution_returns_empty(self, profile_env):
from hermes_cli.profiles import create_profile
create_profile(name="plain", no_alias=True)
assert describe_distribution("plain") == {}
def test_describe_missing_profile_raises(self, profile_env):
with pytest.raises(DistributionError, match="does not exist"):
describe_distribution("nonexistent")
# ===========================================================================
# Security — USER_OWNED_EXCLUDE covers the right paths
# ===========================================================================
class TestSecurity:
def test_user_owned_exclude_covers_credentials(self):
assert "auth.json" in USER_OWNED_EXCLUDE
assert ".env" in USER_OWNED_EXCLUDE
assert "memories" in USER_OWNED_EXCLUDE
assert "sessions" in USER_OWNED_EXCLUDE
assert "local" in USER_OWNED_EXCLUDE
def test_install_does_not_import_credentials_from_staging(self, profile_env):
"""If an author accidentally ships auth.json or .env in their
staging dir, the installer must NOT copy them to the target profile."""
staged = _make_staging_dir(profile_env, "src")
# Author leaks credentials into the staging tree (shouldn't happen, but...)
(staged / "auth.json").write_text('{"leaked": true}')
(staged / ".env").write_text("LEAKED=1")
plan = install_distribution(str(staged), name="clean")
assert not (plan.target_dir / "auth.json").exists(), "auth.json leaked"
# Fresh profile may have its own .env via the bootstrap; what we care
# about is that the leaked content didn't land in the target.
if (plan.target_dir / ".env").exists():
assert "LEAKED" not in (plan.target_dir / ".env").read_text()
# ===========================================================================
# Install-time metadata (installed_at stamp)
# ===========================================================================
class TestInstalledAtStamp:
def test_install_stamps_installed_at(self, profile_env):
staged = _make_staging_dir(profile_env, "src")
plan = install_distribution(str(staged), name="stamped")
mf = read_manifest(plan.target_dir)
assert mf.installed_at, "installed_at should be set after install"
# ISO-8601 UTC sanity: starts with 4-digit year, contains 'T', ends with '+00:00'.
assert mf.installed_at[:4].isdigit()
assert "T" in mf.installed_at
assert mf.installed_at.endswith("+00:00")
def test_update_refreshes_installed_at(self, profile_env, monkeypatch):
staged = _make_staging_dir(profile_env, "src")
install_distribution(str(staged), name="demo")
from hermes_cli.profiles import get_profile_dir
first = read_manifest(get_profile_dir("demo")).installed_at
# Freeze `datetime.now()` to a fixed future time so we can observe that
# update writes a NEW stamp (installs within the same second otherwise
# collide at iso-8601 seconds resolution).
import datetime as _dt
class _FakeDT(_dt.datetime):
@classmethod
def now(cls, tz=None):
return _dt.datetime(2099, 1, 1, 0, 0, 0, tzinfo=tz or _dt.timezone.utc)
monkeypatch.setattr(
"hermes_cli.profile_distribution.datetime", _FakeDT, raising=True
)
from hermes_cli.profile_distribution import update_distribution
update_distribution("demo")
refreshed = read_manifest(get_profile_dir("demo")).installed_at
assert refreshed != first, "installed_at should change on update"
assert refreshed.startswith("2099-01-01"), refreshed
# ===========================================================================
# ProfileInfo exposes distribution metadata
# ===========================================================================
class TestProfileInfoDistribution:
def test_installed_distribution_shows_in_list(self, profile_env):
staged = _make_staging_dir(
profile_env, "src",
manifest=DistributionManifest(name="telem", version="1.2.3"),
)
install_distribution(str(staged), name="telem")
from hermes_cli.profiles import list_profiles
rows = {p.name: p for p in list_profiles()}
assert "telem" in rows
row = rows["telem"]
assert row.distribution_name == "telem"
assert row.distribution_version == "1.2.3"
assert row.distribution_source # path populated, exact value depends on fixture
def test_plain_profile_has_no_distribution_fields(self, profile_env):
from hermes_cli.profiles import create_profile, list_profiles
create_profile(name="plain", no_alias=True)
rows = {p.name: p for p in list_profiles()}
assert rows["plain"].distribution_name is None
assert rows["plain"].distribution_version is None
def test_malformed_manifest_does_not_break_list(self, profile_env):
from hermes_cli.profiles import create_profile, list_profiles, get_profile_dir
create_profile(name="brokenmeta", no_alias=True)
# Write a distribution.yaml that isn't a valid mapping
(get_profile_dir("brokenmeta") / "distribution.yaml").write_text(
"not: [a, valid, mapping\n" # broken YAML
)
# list_profiles must NOT raise; distribution_* stay None for this row.
rows = {p.name: p for p in list_profiles()}
assert rows["brokenmeta"].distribution_name is None
# ===========================================================================
# Error surfaces: validation failures should propagate as DistributionError
# or ValueError (both caught and rendered cleanly by the CLI handler)
# ===========================================================================
class TestErrorSurfaces:
def test_bad_profile_name_raises_valueerror_not_traceback(self, profile_env, tmp_path):
"""A manifest whose 'name' can't be used as a profile identifier
should raise ValueError from validate_profile_name the CLI handler
catches both DistributionError and ValueError so users see a clean
'Error: ...' line instead of a Python traceback.
"""
mf = DistributionManifest(name="Invalid Name With Spaces", version="0.1.0")
staged = _make_staging_dir(profile_env, "bad", manifest=mf)
with pytest.raises((ValueError, DistributionError)):
plan_install(str(staged), tmp_path / "work")
def test_path_traversal_name_rejected(self, profile_env, tmp_path):
mf = DistributionManifest(name="../../etc/passwd", version="0.1.0")
staged = _make_staging_dir(profile_env, "bad", manifest=mf)
with pytest.raises((ValueError, DistributionError)):
plan_install(str(staged), tmp_path / "work")

View file

@ -15,6 +15,7 @@ from unittest.mock import patch, MagicMock
import pytest
from hermes_cli.profiles import (
normalize_profile_name,
validate_profile_name,
get_profile_dir,
create_profile,
@ -32,6 +33,9 @@ from hermes_cli.profiles import (
generate_zsh_completion,
_get_profiles_root,
_get_default_hermes_home,
seed_profile_skills,
has_bundled_skills_opt_out,
NO_BUNDLED_SKILLS_MARKER,
)
@ -58,6 +62,24 @@ def profile_env(tmp_path, monkeypatch):
# TestValidateProfileName
# ===================================================================
class TestNormalizeProfileName:
"""Tests for normalize_profile_name()."""
def test_title_case_normalized(self):
assert normalize_profile_name("Jules") == "jules"
assert normalize_profile_name(" Librarian ") == "librarian"
def test_default_case_insensitive(self):
assert normalize_profile_name("Default") == "default"
assert normalize_profile_name("DEFAULT") == "default"
def test_empty_raises(self):
with pytest.raises(ValueError, match="cannot be empty"):
normalize_profile_name("")
with pytest.raises(ValueError, match="cannot be empty"):
normalize_profile_name(" ")
class TestValidateProfileName:
"""Tests for validate_profile_name()."""
@ -66,6 +88,11 @@ class TestValidateProfileName:
# Should not raise
validate_profile_name(name)
def test_uppercase_rejected(self):
# validate_profile_name is strict — callers normalize first, then validate.
with pytest.raises(ValueError):
validate_profile_name("Jules")
@pytest.mark.parametrize("name", ["UPPER", "has space", ".hidden", "-leading"])
def test_invalid_names_rejected(self, name):
with pytest.raises(ValueError):
@ -89,6 +116,14 @@ class TestValidateProfileName:
with pytest.raises(ValueError):
validate_profile_name("")
@pytest.mark.parametrize("name", ["hermes", "test", "tmp", "root", "sudo"])
def test_reserved_names_rejected(self, name):
"""Reserved names collide with the Hermes install itself or with
common system binaries reject them at validate time so
create/install/rename all share one gate."""
with pytest.raises(ValueError, match="reserved"):
validate_profile_name(name)
# ===================================================================
# TestGetProfileDir
@ -107,6 +142,10 @@ class TestGetProfileDir:
result = get_profile_dir("coder")
assert result == tmp_path / ".hermes" / "profiles" / "coder"
def test_named_profile_matching_is_case_insensitive(self, profile_env):
tmp_path = profile_env
assert get_profile_dir("Coder") == tmp_path / ".hermes" / "profiles" / "coder"
# ===================================================================
# TestCreateProfile
@ -205,6 +244,64 @@ class TestCreateProfile:
assert (profile_dir / "memories" / "note.md").read_text() == "remember this"
assert not (profile_dir / "profiles").exists()
def test_clone_all_excludes_default_infrastructure(self, profile_env):
"""--clone-all from default profile excludes hermes-agent, .worktrees,
bin, node_modules at root, plus __pycache__/*.pyc/*.pyo/*.sock/*.tmp
at any depth. Profile data (config, env, skills, sessions, logs,
state.db) must be preserved clone-all means "complete snapshot
minus infrastructure."
"""
tmp_path = profile_env
default_home = tmp_path / ".hermes"
# Simulate infrastructure dirs that only the default profile has
(default_home / "hermes-agent" / ".git").mkdir(parents=True)
(default_home / "hermes-agent" / "venv" / "bin").mkdir(parents=True)
(default_home / "hermes-agent" / "README.md").write_text("repo")
(default_home / ".worktrees" / "some-tree").mkdir(parents=True)
(default_home / "profiles" / "other").mkdir(parents=True)
(default_home / "profiles" / "other" / "config.yaml").write_text("x")
(default_home / "bin").mkdir(exist_ok=True)
(default_home / "bin" / "tool").write_text("binary")
(default_home / "node_modules" / ".package-lock.json").mkdir(parents=True)
# Bytecode + temp files at nested depth (universal exclusion)
(default_home / "skills" / "my-skill" / "__pycache__").mkdir(parents=True)
(default_home / "skills" / "my-skill" / "__pycache__" / "module.cpython-311.pyc").write_text("stale")
(default_home / "skills" / "my-skill" / "module.pyc").write_text("stale")
(default_home / "skills" / "my-skill" / "module.pyo").write_text("stale")
(default_home / "data.sock").write_text("socket")
(default_home / "data.tmp").write_text("tmp")
# Profile data that SHOULD be copied
(default_home / "skills" / "my-skill").mkdir(parents=True, exist_ok=True)
(default_home / "skills" / "my-skill" / "SKILL.md").write_text("skill")
(default_home / "config.yaml").write_text("model: gpt-4")
(default_home / ".env").write_text("KEY=val")
(default_home / "state.db").write_text("sessions-data")
(default_home / "sessions").mkdir(exist_ok=True)
(default_home / "logs").mkdir(exist_ok=True)
(default_home / "logs" / "gateway.log").write_text("log")
profile_dir = create_profile("cloned", clone_all=True, no_alias=True)
# Infrastructure must be excluded
assert not (profile_dir / "hermes-agent").exists()
assert not (profile_dir / ".worktrees").exists()
assert not (profile_dir / "profiles").exists()
assert not (profile_dir / "bin").exists()
assert not (profile_dir / "node_modules").exists()
# Universal exclusions at any depth
assert not (profile_dir / "data.sock").exists()
assert not (profile_dir / "data.tmp").exists()
assert not (profile_dir / "skills" / "my-skill" / "__pycache__").exists()
assert not (profile_dir / "skills" / "my-skill" / "module.pyc").exists()
assert not (profile_dir / "skills" / "my-skill" / "module.pyo").exists()
# All profile data must be present
assert (profile_dir / "skills" / "my-skill" / "SKILL.md").read_text() == "skill"
assert (profile_dir / "config.yaml").read_text() == "model: gpt-4"
assert (profile_dir / ".env").read_text() == "KEY=val"
assert (profile_dir / "state.db").read_text() == "sessions-data"
assert (profile_dir / "sessions").exists()
assert (profile_dir / "logs" / "gateway.log").read_text() == "log"
def test_clone_config_missing_files_skipped(self, profile_env):
"""Clone config gracefully skips files that don't exist in source."""
profile_dir = create_profile("coder", clone_config=True, no_alias=True)
@ -215,6 +312,116 @@ class TestCreateProfile:
assert (profile_dir / "SOUL.md").exists()
# ===================================================================
# TestNoSkillsOptOut
# ===================================================================
class TestNoSkillsOptOut:
"""Tests for `hermes profile create --no-skills` and the opt-out marker."""
def test_no_skills_writes_marker_and_skips_seeding(self, profile_env):
profile_dir = create_profile("orchestrator", no_alias=True, no_skills=True)
# Marker file is present
marker = profile_dir / NO_BUNDLED_SKILLS_MARKER
assert marker.is_file(), "expected .no-bundled-skills marker in profile root"
assert "--no-skills" in marker.read_text()
# has_bundled_skills_opt_out() agrees
assert has_bundled_skills_opt_out(profile_dir) is True
# skills/ dir exists (profile bootstrapping still creates the dir) but
# contains nothing yet because create_profile itself doesn't seed.
assert (profile_dir / "skills").is_dir()
assert list((profile_dir / "skills").iterdir()) == []
def test_no_skills_conflicts_with_clone(self, profile_env):
with pytest.raises(ValueError, match="mutually exclusive"):
create_profile(
"orchestrator",
no_alias=True,
no_skills=True,
clone_config=True,
)
def test_no_skills_conflicts_with_clone_all(self, profile_env):
with pytest.raises(ValueError, match="mutually exclusive"):
create_profile(
"orchestrator",
no_alias=True,
no_skills=True,
clone_all=True,
)
def test_seed_profile_skills_respects_marker(self, profile_env):
"""seed_profile_skills() must no-op on opted-out profiles even when
called directly (e.g. by `hermes update`'s all-profile sync loop)."""
profile_dir = create_profile("orchestrator", no_alias=True, no_skills=True)
# Call seed_profile_skills() directly — it should NOT invoke subprocess,
# NOT modify the skills/ dir, and return a dict with skipped_opt_out=True.
result = seed_profile_skills(profile_dir, quiet=True)
assert result is not None
assert result.get("skipped_opt_out") is True
assert result.get("copied") == []
# skills/ stays empty — no subprocess ran
assert list((profile_dir / "skills").iterdir()) == []
def test_default_profile_gets_skills_seeded(self, profile_env, monkeypatch):
"""Sanity: without --no-skills, seed_profile_skills() runs the real
subprocess path. Mock the subprocess so the test is hermetic, and
just confirm the marker is NOT checked in the non-opt-out case."""
import subprocess as _sp
profile_dir = create_profile("coder", no_alias=True)
# No marker — not opted out
assert not (profile_dir / NO_BUNDLED_SKILLS_MARKER).exists()
assert has_bundled_skills_opt_out(profile_dir) is False
# Mock subprocess.run to avoid actually running skill sync in tests
calls = []
def fake_run(*args, **kwargs):
calls.append(args)
return _sp.CompletedProcess(
args=args, returncode=0, stdout='{"copied": ["x"]}', stderr=""
)
monkeypatch.setattr("subprocess.run", fake_run)
result = seed_profile_skills(profile_dir, quiet=True)
# Subprocess was invoked (the opt-out branch did NOT short-circuit)
assert len(calls) == 1
assert result == {"copied": ["x"]}
def test_delete_marker_re_enables_seeding(self, profile_env, monkeypatch):
"""Deleting .no-bundled-skills opts the profile back in."""
import subprocess as _sp
profile_dir = create_profile("orchestrator", no_alias=True, no_skills=True)
assert has_bundled_skills_opt_out(profile_dir) is True
# First call: opted out, returns skipped dict without touching subprocess
called = []
monkeypatch.setattr(
"subprocess.run",
lambda *a, **kw: (called.append(a), _sp.CompletedProcess(
args=a, returncode=0, stdout='{"copied": []}', stderr=""
))[1],
)
r1 = seed_profile_skills(profile_dir, quiet=True)
assert r1.get("skipped_opt_out") is True
assert called == []
# Delete marker → next call runs the real path
(profile_dir / NO_BUNDLED_SKILLS_MARKER).unlink()
assert has_bundled_skills_opt_out(profile_dir) is False
r2 = seed_profile_skills(profile_dir, quiet=True)
assert r2 == {"copied": []}
assert len(called) == 1
# ===================================================================
# TestDeleteProfile
# ===================================================================

View file

@ -0,0 +1,157 @@
"""Tests for ``_prompt_api_key`` — the shared Keep/Replace/Clear menu used by
``hermes setup`` / ``hermes model`` when an API key already exists in ``.env``.
Regression coverage for #16394: the wizard used to silently skip the key prompt
when any value was present (even malformed junk), leaving users stuck.
"""
from __future__ import annotations
from pathlib import Path
from unittest.mock import patch
import pytest
@pytest.fixture
def profile_env(tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setattr(Path, "home", lambda: tmp_path)
monkeypatch.setenv("HERMES_HOME", str(home))
(home / ".env").write_text("")
return home
def _pconfig(name="deepseek"):
from hermes_cli.auth import PROVIDER_REGISTRY
return PROVIDER_REGISTRY[name]
def _run_prompt(existing_key, choice, new_key="", provider_id="", pconfig_name="deepseek"):
"""Invoke _prompt_api_key with mocked input()/getpass() responses."""
from hermes_cli import main as m
pconfig = _pconfig(pconfig_name)
with patch("builtins.input", return_value=choice), \
patch("getpass.getpass", return_value=new_key):
return m._prompt_api_key(pconfig, existing_key, provider_id=provider_id)
# First-time entry ────────────────────────────────────────────────────────────
def test_first_time_save_new_key(profile_env):
from hermes_cli.config import get_env_value
key, abort = _run_prompt(existing_key="", choice="", new_key="sk-abcdef")
assert key == "sk-abcdef"
assert abort is False
assert get_env_value("DEEPSEEK_API_KEY") == "sk-abcdef"
def test_first_time_cancelled(profile_env):
key, abort = _run_prompt(existing_key="", choice="", new_key="")
assert key == ""
assert abort is True
# Already configured — K / R / C ───────────────────────────────────────────────
def test_keep_default_empty_input(profile_env):
from hermes_cli.config import save_env_value
save_env_value("DEEPSEEK_API_KEY", "sk-existing")
key, abort = _run_prompt(existing_key="sk-existing", choice="")
assert key == "sk-existing"
assert abort is False
def test_keep_letter_k(profile_env):
key, abort = _run_prompt(existing_key="sk-existing", choice="k")
assert key == "sk-existing"
assert abort is False
def test_keep_on_unrecognised_input(profile_env):
"""Garbage input falls through to keep — never destroys the user's key."""
key, abort = _run_prompt(existing_key="sk-existing", choice="xyz")
assert key == "sk-existing"
assert abort is False
def test_replace_saves_new_key(profile_env):
from hermes_cli.config import get_env_value, save_env_value
save_env_value("DEEPSEEK_API_KEY", "sk-malformed-junk")
key, abort = _run_prompt(
existing_key="sk-malformed-junk", choice="r", new_key="sk-fresh"
)
assert key == "sk-fresh"
assert abort is False
assert get_env_value("DEEPSEEK_API_KEY") == "sk-fresh"
def test_replace_cancelled_preserves_key(profile_env):
"""Empty entry to the Replace prompt means cancel — keeps the old key intact."""
from hermes_cli.config import get_env_value, save_env_value
save_env_value("DEEPSEEK_API_KEY", "sk-existing")
key, abort = _run_prompt(
existing_key="sk-existing", choice="r", new_key=""
)
assert key == "sk-existing"
assert abort is False
assert get_env_value("DEEPSEEK_API_KEY") == "sk-existing"
def test_clear_wipes_env_and_aborts(profile_env):
from hermes_cli.config import get_env_value, save_env_value
save_env_value("DEEPSEEK_API_KEY", "sk-existing")
save_env_value("OTHER_VAR", "keep-me")
key, abort = _run_prompt(existing_key="sk-existing", choice="c")
assert key == ""
assert abort is True
# Cleared, but sibling entries untouched.
assert not get_env_value("DEEPSEEK_API_KEY")
assert get_env_value("OTHER_VAR") == "keep-me"
def test_ctrl_c_at_choice_prompt_keeps(profile_env):
from hermes_cli import main as m
pconfig = _pconfig("deepseek")
with patch("builtins.input", side_effect=KeyboardInterrupt):
key, abort = m._prompt_api_key(pconfig, "sk-existing")
assert key == "sk-existing"
assert abort is False
# LM Studio no-auth placeholder ────────────────────────────────────────────────
def test_lmstudio_first_time_empty_uses_placeholder(profile_env):
from hermes_cli.auth import LMSTUDIO_NOAUTH_PLACEHOLDER
from hermes_cli.config import get_env_value
key, abort = _run_prompt(
existing_key="", choice="", new_key="",
provider_id="lmstudio", pconfig_name="lmstudio",
)
assert key == LMSTUDIO_NOAUTH_PLACEHOLDER
assert abort is False
assert get_env_value("LM_API_KEY") == LMSTUDIO_NOAUTH_PLACEHOLDER
def test_lmstudio_replace_empty_does_not_overwrite_with_placeholder(profile_env):
"""On REPLACE with empty input, preserve the user's existing key — do NOT
silently substitute the placeholder. The placeholder path only fires for
first-time configuration where the user has made no explicit choice yet."""
from hermes_cli.config import get_env_value, save_env_value
save_env_value("LM_API_KEY", "my-real-lmstudio-key")
key, abort = _run_prompt(
existing_key="my-real-lmstudio-key", choice="r", new_key="",
provider_id="lmstudio", pconfig_name="lmstudio",
)
assert key == "my-real-lmstudio-key"
assert abort is False
assert get_env_value("LM_API_KEY") == "my-real-lmstudio-key"

View file

@ -72,11 +72,13 @@ def test_redact_secrets_false_in_config_yaml_is_honored(tmp_path):
assert "ENV_VAR=false" in result.stdout
def test_redact_secrets_default_false_when_unset(tmp_path):
"""Without the config key, redaction stays OFF by default.
def test_redact_secrets_default_true_when_unset(tmp_path):
"""Without the config key or env var, redaction is ON by default (#17691).
Secret redaction is opt-in users who want it must set
`security.redact_secrets: true` explicitly (or HERMES_REDACT_SECRETS=true).
Secret redaction is a secure default users who need raw credential
values in tool output (e.g. working on the redactor itself) must set
`security.redact_secrets: false` explicitly (or
`HERMES_REDACT_SECRETS=false`).
"""
hermes_home = tmp_path / ".hermes"
hermes_home.mkdir()
@ -107,7 +109,7 @@ def test_redact_secrets_default_false_when_unset(tmp_path):
timeout=30,
)
assert result.returncode == 0, f"probe failed: {result.stderr}"
assert "REDACT_ENABLED=False" in result.stdout
assert "REDACT_ENABLED=True" in result.stdout
def test_redact_secrets_true_in_config_yaml_is_honored(tmp_path):

View file

@ -152,4 +152,135 @@ class TestRelaunch:
with pytest.raises(SystemExit):
relaunch_mod.relaunch(["--resume", "abc"])
assert calls == [("/usr/bin/hermes", ["/usr/bin/hermes", "--resume", "abc"])]
assert calls == [("/usr/bin/hermes", ["/usr/bin/hermes", "--resume", "abc"])]
def test_windows_uses_subprocess_not_execvp(self, monkeypatch):
"""On Windows, os.execvp raises OSError "Exec format error" when the
target is a .cmd shim or console-script wrapper (both common for
hermes). relaunch() must detect win32 and use subprocess.run +
sys.exit instead."""
monkeypatch.setattr(relaunch_mod.sys, "platform", "win32")
monkeypatch.setattr(relaunch_mod, "resolve_hermes_bin", lambda: r"C:\Users\test\hermes.exe")
import subprocess as _subprocess
captured_argv = []
def fake_subprocess_run(argv, **kwargs):
captured_argv.append(list(argv))
class _Result:
returncode = 0
return _Result()
monkeypatch.setattr(_subprocess, "run", fake_subprocess_run)
# execvp MUST NOT be called on Windows — route must go through subprocess
execvp_calls = []
def fake_execvp(*args, **kwargs):
execvp_calls.append(args)
raise AssertionError("os.execvp must not be called on Windows")
monkeypatch.setattr(relaunch_mod.os, "execvp", fake_execvp)
with pytest.raises(SystemExit) as exc_info:
relaunch_mod.relaunch(["chat"])
assert exc_info.value.code == 0
assert execvp_calls == []
assert captured_argv == [[r"C:\Users\test\hermes.exe", "chat"]]
def test_windows_propagates_child_exit_code(self, monkeypatch):
"""A non-zero exit from the child should flow through to sys.exit."""
monkeypatch.setattr(relaunch_mod.sys, "platform", "win32")
monkeypatch.setattr(relaunch_mod, "resolve_hermes_bin", lambda: r"C:\hermes.exe")
import subprocess as _subprocess
def fake_run(argv, **kwargs):
class _Result:
returncode = 42
return _Result()
monkeypatch.setattr(_subprocess, "run", fake_run)
monkeypatch.setattr(relaunch_mod.os, "execvp", lambda *a, **kw: None)
with pytest.raises(SystemExit) as exc_info:
relaunch_mod.relaunch(["chat"])
assert exc_info.value.code == 42
def test_windows_surfaces_oserror_with_help(self, monkeypatch, capsys):
"""When subprocess itself raises OSError (file-not-found / bad format),
we must NOT let it bubble up as a cryptic traceback print a
user-readable hint and sys.exit(1)."""
monkeypatch.setattr(relaunch_mod.sys, "platform", "win32")
monkeypatch.setattr(relaunch_mod, "resolve_hermes_bin", lambda: r"C:\missing.exe")
import subprocess as _subprocess
def fake_run(argv, **kwargs):
raise OSError(2, "No such file or directory")
monkeypatch.setattr(_subprocess, "run", fake_run)
monkeypatch.setattr(relaunch_mod.os, "execvp", lambda *a, **kw: None)
with pytest.raises(SystemExit) as exc_info:
relaunch_mod.relaunch(["chat"])
assert exc_info.value.code == 1
err = capsys.readouterr().err
assert "relaunch failed" in err
assert "open a new terminal" in err.lower() or "path" in err.lower()
class TestResolveHermesBinWindowsPyGuard:
"""On Windows, resolve_hermes_bin MUST NOT return a .py path.
os.access(x, os.X_OK) returns True for .py files on Windows because
PATHEXT includes .py when the Python launcher is installed but
subprocess.run can't actually exec a .py directly, so the relaunch
would fail with the cryptic "%1 is not a valid Win32 application" error.
"""
def test_windows_rejects_py_argv0_falls_through_to_path(self, monkeypatch, tmp_path):
"""On Windows, if sys.argv[0] is a .py file, we must skip the
argv[0] fast-path and fall through to PATH / python -m."""
# Build a fake .py script that "passes" the isfile + X_OK checks.
script = tmp_path / "main.py"
script.write_text("# stub")
monkeypatch.setattr(relaunch_mod.sys, "platform", "win32")
monkeypatch.setattr(relaunch_mod.sys, "argv", [str(script), "chat"])
# Force PATH lookup to return a hermes.exe so the test doesn't
# exercise the None-fallback path (that's a separate test).
monkeypatch.setattr(
relaunch_mod.shutil, "which",
lambda name: r"C:\venv\Scripts\hermes.exe" if name == "hermes" else None,
)
bin_path = relaunch_mod.resolve_hermes_bin()
# Must NOT be the .py — must be the hermes.exe PATH entry.
assert bin_path == r"C:\venv\Scripts\hermes.exe"
def test_posix_still_accepts_py_argv0(self, monkeypatch, tmp_path):
"""POSIX behaviour unchanged: argv[0] pointing at an executable
script (including .py with a shebang + chmod +x) is fine to return
because POSIX exec can route through the shebang line."""
if sys.platform == "win32":
pytest.skip("POSIX semantics")
script = tmp_path / "hermes"
script.write_text("#!/usr/bin/env python3\n")
script.chmod(0o755)
monkeypatch.setattr(relaunch_mod.sys, "argv", [str(script), "chat"])
assert relaunch_mod.resolve_hermes_bin() == str(script)
def test_windows_py_argv0_with_no_hermes_on_path_returns_none(self, monkeypatch, tmp_path):
"""Bulletproof fallback: if argv0 is .py on Windows AND hermes.exe
isn't on PATH, return None so the caller falls back to
python -m hermes_cli.main."""
script = tmp_path / "main.py"
script.write_text("# stub")
monkeypatch.setattr(relaunch_mod.sys, "platform", "win32")
monkeypatch.setattr(relaunch_mod.sys, "argv", [str(script), "chat"])
monkeypatch.setattr(relaunch_mod.shutil, "which", lambda name: None)
assert relaunch_mod.resolve_hermes_bin() is None

View file

@ -897,6 +897,58 @@ def test_named_custom_provider_does_not_shadow_builtin_provider(monkeypatch):
assert resolved["requested_provider"] == "nous"
def test_named_custom_provider_wins_over_builtin_alias(monkeypatch):
"""A custom_providers entry named after a built-in *alias* (not a canonical
provider name) must win over the built-in. Regression guard for #15743:
when users define ``custom_providers: [{name: kimi, ...}]`` and reference
``provider: kimi``, the built-in alias rewriting (``kimi`` ``kimi-coding``)
would otherwise hijack the request and send it to the wrong endpoint.
"""
monkeypatch.setattr(
rp,
"load_config",
lambda: {
"custom_providers": [
{
"name": "kimi",
"base_url": "https://my-custom-kimi.example.com/v1",
"api_key": "my-kimi-key",
}
]
},
)
entry = rp._get_named_custom_provider("kimi")
assert entry is not None
assert entry["base_url"] == "https://my-custom-kimi.example.com/v1"
assert entry["api_key"] == "my-kimi-key"
def test_named_custom_provider_skipped_for_canonical_built_in(monkeypatch):
"""Companion to the test above: ``nous`` is a canonical provider name
(``resolve_provider('nous') == 'nous'``), so a custom entry with that name
should NOT be returned the built-in wins as before.
"""
monkeypatch.setattr(
rp,
"load_config",
lambda: {
"custom_providers": [
{
"name": "nous",
"base_url": "http://localhost:1234/v1",
"api_key": "shadow-key",
}
]
},
)
entry = rp._get_named_custom_provider("nous")
assert entry is None
def test_explicit_openrouter_skips_openai_base_url(monkeypatch):
"""When the user explicitly requests openrouter, OPENAI_BASE_URL
(which may point to a custom endpoint) must not override the

View file

@ -0,0 +1,202 @@
"""Tests for session handoff (CLI to gateway platform).
The handoff state machine lives on the ``sessions`` table:
None "pending" "running" ("completed" | "failed")
CLI side calls ``request_handoff`` and poll-waits on ``get_handoff_state``.
Gateway side iterates ``list_pending_handoffs``, calls ``claim_handoff`` to
flip pending running, and finishes with ``complete_handoff`` or
``fail_handoff``.
"""
from __future__ import annotations
import time
import pytest
from hermes_state import SessionDB
class TestHandoffStateDB:
"""Test the handoff schema + helper methods on SessionDB."""
@pytest.fixture
def db(self, tmp_path, monkeypatch):
home = tmp_path / ".hermes"
home.mkdir()
monkeypatch.setenv("HERMES_HOME", str(home))
return SessionDB(db_path=home / "state.db")
def _make_session(self, db, session_id, source="cli", title=None):
"""Insert a session row directly for testing."""
def _do(conn):
conn.execute(
"INSERT OR IGNORE INTO sessions (id, source, title, started_at) "
"VALUES (?, ?, ?, ?)",
(session_id, source, title, time.time()),
)
db._execute_write(_do)
def test_columns_exist(self, db):
db._conn.execute(
"SELECT handoff_state, handoff_platform, handoff_error "
"FROM sessions LIMIT 0"
)
def test_request_handoff_marks_pending(self, db):
sid = "sess-1"
self._make_session(db, sid)
assert db.request_handoff(sid, "telegram") is True
state = db.get_handoff_state(sid)
assert state == {
"state": "pending",
"platform": "telegram",
"error": None,
}
def test_request_handoff_rejects_in_flight(self, db):
sid = "sess-2"
self._make_session(db, sid)
assert db.request_handoff(sid, "telegram") is True
# Still pending → reject re-request
assert db.request_handoff(sid, "discord") is False
# And after gateway claims it (running) → still rejected
assert db.claim_handoff(sid) is True
assert db.request_handoff(sid, "discord") is False
def test_request_handoff_after_terminal_state_resets_error(self, db):
sid = "sess-3"
self._make_session(db, sid)
db.request_handoff(sid, "telegram")
db.claim_handoff(sid)
db.fail_handoff(sid, "earlier failure")
# User retries — should be allowed and clear the prior error.
assert db.request_handoff(sid, "discord") is True
state = db.get_handoff_state(sid)
assert state["state"] == "pending"
assert state["platform"] == "discord"
assert state["error"] is None
def test_list_pending_handoffs_excludes_running_and_terminal(self, db):
a, b, c, d = "sess-a", "sess-b", "sess-c", "sess-d"
for sid in (a, b, c, d):
self._make_session(db, sid)
db.request_handoff(a, "telegram")
db.request_handoff(b, "discord")
db.request_handoff(c, "telegram")
db.claim_handoff(c) # c is now running, not pending
db.request_handoff(d, "slack")
db.claim_handoff(d)
db.complete_handoff(d) # d is terminal
pending = db.list_pending_handoffs()
ids = [r["id"] for r in pending]
assert set(ids) == {a, b}
def test_claim_handoff_is_atomic(self, db):
sid = "sess-claim"
self._make_session(db, sid)
db.request_handoff(sid, "telegram")
# First claim wins
assert db.claim_handoff(sid) is True
# Second claim is a no-op (state is now "running", not "pending")
assert db.claim_handoff(sid) is False
assert db.get_handoff_state(sid)["state"] == "running"
def test_complete_handoff_clears_error(self, db):
sid = "sess-complete"
self._make_session(db, sid)
db.request_handoff(sid, "telegram")
db.claim_handoff(sid)
db.fail_handoff(sid, "transient")
# User retries; mock the watcher path
db.request_handoff(sid, "telegram")
db.claim_handoff(sid)
db.complete_handoff(sid)
state = db.get_handoff_state(sid)
assert state["state"] == "completed"
assert state["error"] is None
def test_fail_handoff_records_reason(self, db):
sid = "sess-fail"
self._make_session(db, sid)
db.request_handoff(sid, "telegram")
db.claim_handoff(sid)
db.fail_handoff(sid, "no home channel for telegram")
state = db.get_handoff_state(sid)
assert state["state"] == "failed"
assert state["error"] == "no home channel for telegram"
def test_fail_handoff_truncates_long_reasons(self, db):
sid = "sess-fail-long"
self._make_session(db, sid)
db.request_handoff(sid, "telegram")
db.claim_handoff(sid)
# 1000-character error string
big_err = "x" * 1000
db.fail_handoff(sid, big_err)
state = db.get_handoff_state(sid)
assert len(state["error"]) <= 500
def test_get_handoff_state_for_unknown_session(self, db):
assert db.get_handoff_state("does-not-exist") is None
def test_full_pending_to_completed_flow(self, db):
"""End-to-end sequence the CLI + gateway watcher follow."""
sid = "sess-flow"
self._make_session(db, sid, title="my session")
db.append_message(sid, "user", "Hello")
db.append_message(sid, "assistant", "Hi there!")
# CLI: request handoff
assert db.request_handoff(sid, "telegram") is True
assert db.get_handoff_state(sid)["state"] == "pending"
# Gateway watcher: discover + claim
pending = db.list_pending_handoffs()
assert len(pending) == 1
assert pending[0]["id"] == sid
assert db.claim_handoff(sid) is True
assert db.get_handoff_state(sid)["state"] == "running"
# Gateway uses get_messages to load the transcript (real flow uses
# session_store.switch_session which reads the same table).
messages = db.get_messages(sid)
assert [m["role"] for m in messages] == ["user", "assistant"]
# Gateway: mark completed
db.complete_handoff(sid)
assert db.get_handoff_state(sid)["state"] == "completed"
assert db.list_pending_handoffs() == []
class TestHandoffCommandRegistration:
"""Slash-command surface checks."""
def test_command_registered(self):
from hermes_cli.commands import resolve_command
cmd = resolve_command("handoff")
assert cmd is not None
assert cmd.name == "handoff"
assert cmd.category == "Session"
def test_command_is_cli_only(self):
"""`/handoff` is initiated from the CLI; gateway shouldn't expose it."""
from hermes_cli.commands import resolve_command, GATEWAY_KNOWN_COMMANDS
cmd = resolve_command("handoff")
assert cmd is not None
assert cmd.cli_only is True
assert "handoff" not in GATEWAY_KNOWN_COMMANDS

View file

@ -613,3 +613,35 @@ def test_offer_launch_chat_falls_back_to_module(monkeypatch):
setup_mod._offer_launch_chat()
assert exec_calls == [(sys.executable, [sys.executable, "-m", "hermes_cli.main", "chat"])]
def test_setup_slack_saves_home_channel(monkeypatch):
"""_setup_slack() saves SLACK_HOME_CHANNEL when the user provides one."""
saved = {}
prompts = iter(["xoxb-test-token", "xapp-test-token", "", "C01ABC2DE3F"])
monkeypatch.setattr(setup_mod, "get_env_value", lambda key: "")
monkeypatch.setattr(setup_mod, "save_env_value", lambda k, v: saved.update({k: v}))
monkeypatch.setattr(setup_mod, "prompt", lambda *_a, **_kw: next(prompts))
monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *_a, **_kw: False)
monkeypatch.setattr(setup_mod, "_write_slack_manifest_and_instruct", lambda: None)
setup_mod._setup_slack()
assert saved.get("SLACK_HOME_CHANNEL") == "C01ABC2DE3F"
def test_setup_slack_home_channel_empty_not_saved(monkeypatch):
"""_setup_slack() does not save SLACK_HOME_CHANNEL when left blank."""
saved = {}
prompts = iter(["xoxb-test-token", "xapp-test-token", "", ""])
monkeypatch.setattr(setup_mod, "get_env_value", lambda key: "")
monkeypatch.setattr(setup_mod, "save_env_value", lambda k, v: saved.update({k: v}))
monkeypatch.setattr(setup_mod, "prompt", lambda *_a, **_kw: next(prompts))
monkeypatch.setattr(setup_mod, "prompt_yes_no", lambda *_a, **_kw: False)
monkeypatch.setattr(setup_mod, "_write_slack_manifest_and_instruct", lambda: None)
setup_mod._setup_slack()
assert "SLACK_HOME_CHANNEL" not in saved

View file

@ -4,11 +4,16 @@ from hermes_cli.setup import setup_agent_settings
def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monkeypatch, capsys):
"""The helper text should match the value shown in the prompt."""
"""The helper text should match the value shown in the prompt.
After PR#18413 max_turns is read exclusively from config.yaml — the
.env `HERMES_MAX_ITERATIONS` fallback was removed because it was
shadowing the user's current config (see the 60-vs-500 incident).
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
config = {
"agent": {"max_turns": 90},
"agent": {"max_turns": 60},
"display": {"tool_progress": "all"},
"compression": {"threshold": 0.50},
"session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
@ -16,10 +21,10 @@ def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monk
prompt_answers = iter(["60", "all", "0.5"])
monkeypatch.setattr("hermes_cli.setup.get_env_value", lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "")
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
monkeypatch.setattr("hermes_cli.setup.remove_env_value", lambda *args, **kwargs: None)
monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
setup_agent_settings(config)
@ -27,3 +32,47 @@ def test_setup_agent_settings_uses_displayed_max_iterations_value(tmp_path, monk
out = capsys.readouterr().out
assert "Press Enter to keep 60." in out
assert "Default is 90" not in out
def test_setup_agent_settings_prefers_config_over_stale_env(tmp_path, monkeypatch, capsys):
"""Config.yaml wins even when a stale .env value disagrees.
Regression guard for the bug where `.env HERMES_MAX_ITERATIONS=60`
from an old `hermes setup` run shadowed `agent.max_turns: 500` in
config.yaml. The wizard must now display the config value.
"""
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
config = {
"agent": {"max_turns": 500}, # user bumped this in config.yaml
"display": {"tool_progress": "all"},
"compression": {"threshold": 0.50},
"session_reset": {"mode": "both", "idle_minutes": 1440, "at_hour": 4},
}
prompt_answers = iter(["500", "all", "0.5"])
# Simulate stale .env value — the wizard must ignore this.
monkeypatch.setattr(
"hermes_cli.setup.get_env_value",
lambda key: "60" if key == "HERMES_MAX_ITERATIONS" else "",
)
monkeypatch.setattr("hermes_cli.setup.prompt", lambda *args, **kwargs: next(prompt_answers))
monkeypatch.setattr("hermes_cli.setup.prompt_choice", lambda *args, **kwargs: 4)
monkeypatch.setattr("hermes_cli.setup.save_env_value", lambda *args, **kwargs: None)
removed_keys: list[str] = []
monkeypatch.setattr(
"hermes_cli.setup.remove_env_value",
lambda key: (removed_keys.append(key), True)[1],
)
monkeypatch.setattr("hermes_cli.setup.save_config", lambda *args, **kwargs: None)
setup_agent_settings(config)
out = capsys.readouterr().out
# Config value wins
assert "Press Enter to keep 500." in out
assert "Press Enter to keep 60." not in out
# And the stale .env entry gets cleaned up
assert "HERMES_MAX_ITERATIONS" in removed_keys

View file

@ -1,6 +1,28 @@
from hermes_cli import setup as setup_mod
def test_prompt_strips_bracketed_paste_markers(monkeypatch):
monkeypatch.setattr(
"builtins.input",
lambda _prompt="": "\x1b[200~sk-ant-api-key\x1b[201~",
)
value = setup_mod.prompt("API key")
assert value == "sk-ant-api-key"
def test_password_prompt_strips_bracketed_paste_markers(monkeypatch):
monkeypatch.setattr(
"getpass.getpass",
lambda _prompt="": "\x1b[200~secret-token\x1b[201~",
)
value = setup_mod.prompt("API key", password=True)
assert value == "secret-token"
def test_prompt_choice_uses_curses_helper(monkeypatch):
monkeypatch.setattr(setup_mod, "_curses_prompt_choice", lambda question, choices, default=0, description=None: 1)

View file

@ -0,0 +1,30 @@
"""Tests for Slack CLI helpers."""
from hermes_cli.slack_cli import _build_full_manifest
class TestSlackFullManifest:
"""Generated full Slack app manifest used by `hermes slack manifest`."""
def test_app_home_messages_are_writable(self):
manifest = _build_full_manifest("Hermes", "Your Hermes agent on Slack")
assert manifest["features"]["app_home"] == {
"home_tab_enabled": False,
"messages_tab_enabled": True,
"messages_tab_read_only_enabled": False,
}
def test_private_channel_directory_scope_is_included(self):
manifest = _build_full_manifest("Hermes", "Your Hermes agent on Slack")
bot_scopes = manifest["oauth_config"]["scopes"]["bot"]
assert "groups:read" in bot_scopes
def test_assistant_features_remain_enabled(self):
manifest = _build_full_manifest("Hermes", "Your Hermes agent on Slack")
assert "assistant_view" in manifest["features"]
assert "assistant:write" in manifest["oauth_config"]["scopes"]["bot"]
bot_events = manifest["settings"]["event_subscriptions"]["bot_events"]
assert "assistant_thread_started" in bot_events

View file

@ -88,6 +88,51 @@ def test_auth_spotify_status_command_reports_logged_in(capsys, monkeypatch: pyte
assert "client_id: spotify-client" in output
def test_spotify_logout_does_not_reset_model_provider(
tmp_path,
monkeypatch: pytest.MonkeyPatch,
capsys,
) -> None:
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
config_path = tmp_path / "config.yaml"
config_path.write_text(
"model:\n"
" default: gemini-3-flash\n"
" provider: custom:local\n"
" base_url: http://localhost:11434/v1\n"
" api_key: ${LOCAL_API_KEY}\n",
encoding="utf-8",
)
with auth_mod._auth_store_lock():
store = auth_mod._load_auth_store()
auth_mod._store_provider_state(
store,
"spotify",
{
"client_id": "spotify-client",
"access_token": "access-token",
"refresh_token": "refresh-token",
"expires_at": "2099-01-01T00:00:00+00:00",
},
set_active=False,
)
auth_mod._save_auth_store(store)
auth_mod.logout_command(SimpleNamespace(provider="spotify"))
output = capsys.readouterr().out
assert "Logged out of Spotify." in output
assert "Model provider configuration was unchanged." in output
assert auth_mod.get_provider_auth_state("spotify") is None
assert config_path.read_text(encoding="utf-8") == (
"model:\n"
" default: gemini-3-flash\n"
" provider: custom:local\n"
" base_url: http://localhost:11434/v1\n"
" api_key: ${LOCAL_API_KEY}\n"
)
def test_spotify_interactive_setup_persists_client_id(
tmp_path,

View file

@ -0,0 +1,180 @@
"""Guards for CLI startup performance regression.
``hermes_cli.main`` skips eager plugin discovery at argparse-setup time
when the invocation is clearly targeting a known built-in subcommand.
This saves 500-650ms on ``hermes --help``, ``hermes version``,
``hermes logs``, etc., by not importing ``google.cloud.pubsub_v1``,
``aiohttp``, ``grpc``, and friends.
Two invariants:
1. ``_BUILTIN_SUBCOMMANDS`` must contain every subcommand that is actually
registered by ``main()``. If an entry is missing, plugin discovery
runs unnecessarily for that command (correctness-safe, just slow).
If an entry is PRESENT but the subcommand doesn't exist, a plugin
could shadow the name also bad.
2. ``_plugin_cli_discovery_needed()`` returns the right answer for the
flag/positional parsing cases it's meant to handle.
"""
from __future__ import annotations
import io
import re
import sys
from contextlib import redirect_stdout
from unittest.mock import patch
import pytest
from hermes_cli.main import (
_BUILTIN_SUBCOMMANDS,
_first_positional_argv,
_plugin_cli_discovery_needed,
)
# ── helper: grab the live set of top-level subcommands from argparse ───────
def _live_subcommand_names() -> set[str]:
"""Run ``hermes --help`` in-process and parse the subcommand block.
We patch ``_plugin_cli_discovery_needed`` to always return False so
plugin-registered commands aren't included — we're validating the
built-in-only set.
"""
from hermes_cli import main as _main
argv_backup = sys.argv[:]
sys.argv = ["hermes", "--help"]
buf = io.StringIO()
try:
with patch.object(_main, "_plugin_cli_discovery_needed", return_value=False):
with redirect_stdout(buf):
with pytest.raises(SystemExit):
_main.main()
finally:
sys.argv = argv_backup
text = buf.getvalue()
# argparse prints "{chat,model,...}" somewhere in the help output
m = re.search(r"\{([a-zA-Z0-9_,\-]+)\}", text)
assert m, f"Could not find subcommand group in --help output:\n{text[:500]}"
return set(m.group(1).split(","))
# ── _first_positional_argv ─────────────────────────────────────────────────
@pytest.mark.parametrize(
"argv,expected",
[
(["hermes"], None),
(["hermes", "--help"], None),
(["hermes", "-h"], None),
(["hermes", "--version"], None),
(["hermes", "-w"], None),
# -p / --profile is stripped from sys.argv by
# _apply_profile_override() at import time, so it never reaches
# _first_positional_argv. We test with just -w / --tui here.
(["hermes", "-w", "--tui"], None),
(["hermes", "version"], "version"),
(["hermes", "--tui", "chat"], "chat"),
(["hermes", "-w", "logs"], "logs"),
(["hermes", "chat", "hello world"], "chat"),
(["hermes", "gateway", "run"], "gateway"),
# Top-level value-taking flags: the value should be skipped.
(["hermes", "-m", "gpt5", "chat"], "chat"),
(["hermes", "--model", "gpt5", "chat", "hi"], "chat"),
(["hermes", "-m", "gpt5", "--provider", "openai", "chat"], "chat"),
(["hermes", "-z", "hello world"], None),
(["hermes", "-z", "hello", "chat"], "chat"),
(["hermes", "--model=gpt5", "chat"], "chat"), # inline form
(["hermes", "--", "chat"], "chat"), # -- terminator
(["hermes", "-w", "--"], None),
# Unknown positional after skipped flags → plugin-cmd candidate.
(["hermes", "some-plugin-cmd"], "some-plugin-cmd"),
(["hermes", "-m", "gpt5", "some-plugin-cmd"], "some-plugin-cmd"),
],
)
def test_first_positional_argv(argv, expected):
with patch.object(sys, "argv", argv):
assert _first_positional_argv() == expected
# ── _plugin_cli_discovery_needed ───────────────────────────────────────────
@pytest.mark.parametrize(
"argv",
[
["hermes"], # bare → chat
["hermes", "--help"], # top-level help
["hermes", "-h"],
["hermes", "version"], # known built-in
["hermes", "logs"],
["hermes", "gateway", "run"],
["hermes", "--tui"],
["hermes", "-w", "--tui"],
["hermes", "chat", "hi"],
["hermes", "help"], # accepted built-in-ish
["hermes", "-m", "gpt5", "chat"], # flag-value-skipping
],
)
def test_discovery_skipped_for_builtins(argv):
with patch.object(sys, "argv", argv):
assert _plugin_cli_discovery_needed() is False
@pytest.mark.parametrize(
"argv",
[
["hermes", "meet", "join"], # potential google_meet plugin
["hermes", "honcho", "status"], # potential memory plugin
["hermes", "unknown-subcmd"],
],
)
def test_discovery_runs_for_unknown_positional(argv):
with patch.object(sys, "argv", argv):
assert _plugin_cli_discovery_needed() is True
# ── _BUILTIN_SUBCOMMANDS ↔ argparse registration parity ────────────────────
def test_builtin_set_covers_every_registered_subcommand():
"""Every subcommand registered in main() must appear in the set.
Missing entries cause a slow-path regression (correctness stays
fine discovery just runs unnecessarily).
"""
live = _live_subcommand_names()
# "help" is synthetic — an argparse-implicit convenience we include
# in the set so ``hermes help <cmd>`` skips discovery; it won't show
# up as a subparser in the --help output.
declared = _BUILTIN_SUBCOMMANDS - {"help"}
missing_from_declaration = live - declared
assert not missing_from_declaration, (
f"_BUILTIN_SUBCOMMANDS is missing these live subcommands: "
f"{sorted(missing_from_declaration)}. Add them to "
f"hermes_cli/main.py::_BUILTIN_SUBCOMMANDS so plugin discovery "
f"can be skipped when the user targets them."
)
def test_builtin_set_has_no_phantom_entries():
"""No entry in the set should refer to a subcommand that no longer exists.
A phantom entry means plugin discovery gets incorrectly skipped for
a name that if a plugin actually registered it would fail to
parse. Keeps the set honest.
"""
live = _live_subcommand_names()
allowed_synthetic = {"help"}
phantom = _BUILTIN_SUBCOMMANDS - live - allowed_synthetic
assert not phantom, (
f"_BUILTIN_SUBCOMMANDS has entries that are not registered as "
f"top-level subparsers: {sorted(phantom)}"
)

View file

@ -113,3 +113,123 @@ class TestOuterExceptEIO:
assert not (getattr(exc, "errno", None) == errno.EIO)
assert "is not registered" not in str(exc)
assert "Bad file descriptor" not in str(exc)
# ---------------------------------------------------------------------------
# Signal handler guarded logger.debug (#13710 regression)
# ---------------------------------------------------------------------------
#
# CPython's logging module is not reentrant-safe. ``Logger.isEnabledFor``
# caches level results in ``Logger._cache``; under shutdown races the cache
# can be cleared (``Logger._clear_cache``) or mid-mutation when the signal
# fires, raising ``KeyError: <level_int>`` (e.g. ``KeyError: 10`` for DEBUG)
# from inside the handler. If that KeyError escapes, it bypasses the
# ``raise KeyboardInterrupt()`` on the next line, which in turn bypasses
# prompt_toolkit's normal interrupt unwind and surfaces as the EIO cascade
# from #13710.
#
# The fix: wrap the ``logger.debug`` call in the signal handler in a bare
# ``try/except Exception: pass`` so logging can never raise through it.
#
# These tests verify the contract: the handler must raise KeyboardInterrupt
# (and nothing else) regardless of whether logger.debug succeeds or blows up.
def _make_signal_handler(logger, agent_state):
"""Build a standalone copy of ``_signal_handler``.
The real handler is defined as a closure inside ``CLI._run_interactive``;
we reconstruct an equivalent here so the unit tests don't need a full
CLI instance. Mirrors cli.py:_signal_handler as of #13710 regression
fix guarded logger.debug + agent interrupt + KeyboardInterrupt.
"""
def _signal_handler(signum, frame):
# Guarded: logging must never raise through a signal handler.
try:
logger.debug("Received signal %s, triggering graceful shutdown", signum)
except Exception:
pass # never let logging raise from a signal handler (#13710 regression)
try:
if agent_state.get("agent") and agent_state.get("running"):
agent_state["agent"].interrupt(f"received signal {signum}")
except Exception:
pass # never block signal handling
raise KeyboardInterrupt()
return _signal_handler
class TestSignalHandlerLoggingRace:
"""#13710 regression — logger.debug in signal handler must not escape.
If the DEBUG-level ``logging._cache`` lookup races with a concurrent
``_clear_cache`` (e.g. from another thread reconfiguring logging during
shutdown), ``logger.debug`` can raise ``KeyError: 10``. The signal
handler must swallow that and still raise KeyboardInterrupt.
"""
def test_keyboard_interrupt_raised_on_normal_path(self):
"""Sanity: handler raises KeyboardInterrupt when logging works."""
logger = MagicMock()
handler = _make_signal_handler(logger, {})
with pytest.raises(KeyboardInterrupt):
handler(15, None) # SIGTERM
logger.debug.assert_called_once()
def test_keyboard_interrupt_raised_when_logger_raises_keyerror(self):
"""logger.debug raising KeyError(10) must not escape — KeyboardInterrupt wins.
This is the exact failure signature from the #13710 regression: the
CPython 3.11 ``Logger._cache[level]`` race surfaces as KeyError on
the integer level value, and previously propagated out of the
signal handler before the ``raise KeyboardInterrupt()`` could fire.
"""
logger = MagicMock()
logger.debug.side_effect = KeyError(10) # DEBUG level int
handler = _make_signal_handler(logger, {})
# Must still raise KeyboardInterrupt, NOT KeyError.
with pytest.raises(KeyboardInterrupt):
handler(15, None)
def test_keyboard_interrupt_raised_when_logger_raises_generic(self):
"""Any Exception from logger.debug must be swallowed by the guard."""
logger = MagicMock()
logger.debug.side_effect = RuntimeError("logging is shutting down")
handler = _make_signal_handler(logger, {})
with pytest.raises(KeyboardInterrupt):
handler(15, None)
def test_agent_interrupt_still_fires_when_logger_raises(self):
"""Even if logger.debug blows up, the agent interrupt must still run.
The whole point of the grace window is cleaning up the agent's
subprocess group. A logging race must not skip that step.
"""
logger = MagicMock()
logger.debug.side_effect = KeyError(10)
agent = MagicMock()
handler = _make_signal_handler(logger, {"agent": agent, "running": True})
with pytest.raises(KeyboardInterrupt):
handler(15, None)
agent.interrupt.assert_called_once_with("received signal 15")
def test_agent_interrupt_failure_also_does_not_escape(self):
"""Defense-in-depth: agent.interrupt() raising must not escape either."""
logger = MagicMock()
agent = MagicMock()
agent.interrupt.side_effect = RuntimeError("agent already torn down")
handler = _make_signal_handler(logger, {"agent": agent, "running": True})
with pytest.raises(KeyboardInterrupt):
handler(15, None)
def test_base_exception_from_logger_is_not_swallowed(self):
"""BaseException (e.g. SystemExit) must still propagate — only Exception is caught.
The guard uses ``except Exception`` deliberately; BaseException
subclasses like SystemExit or a nested KeyboardInterrupt should
still be honored so we don't mask real shutdown signals.
"""
logger = MagicMock()
logger.debug.side_effect = SystemExit(1)
handler = _make_signal_handler(logger, {})
with pytest.raises(SystemExit):
handler(15, None)

View file

@ -0,0 +1,214 @@
"""Tests for the teams_pipeline plugin CLI."""
from __future__ import annotations
import json
from argparse import ArgumentParser, Namespace
from types import SimpleNamespace
import pytest
from plugins.teams_pipeline.cli import register_cli, teams_pipeline_command
from plugins.teams_pipeline.store import TeamsPipelineStore
@pytest.fixture(autouse=True)
def _isolate(tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
def _make_args(**kwargs):
defaults = {
"teams_pipeline_action": None,
"store_path": "",
"status": "",
"limit": 20,
"job_id": "",
"meeting_id": "",
"join_web_url": "",
"tenant_id": "",
"call_record_id": "",
"resource": "",
"notification_url": "",
"change_type": "updated",
"expiration": "",
"client_state": "",
"lifecycle_notification_url": "",
"latest_supported_tls_version": "v1_2",
"subscription_id": "",
"force_refresh": False,
"renew_within_hours": 24,
"extend_hours": 24,
"dry_run": False,
}
defaults.update(kwargs)
return Namespace(**defaults)
def test_register_cli_builds_tree():
parser = ArgumentParser()
register_cli(parser)
args = parser.parse_args(["list"])
assert args.teams_pipeline_action == "list"
def test_list_prints_recent_jobs(capsys, tmp_path):
store = TeamsPipelineStore(tmp_path / "teams_pipeline_store.json")
store.upsert_job(
"job-1",
{
"event_id": "evt-1",
"source_event_type": "updated",
"dedupe_key": "evt-1",
"status": "completed",
"meeting_ref": {"meeting_id": "meeting-1"},
},
)
teams_pipeline_command(
_make_args(
teams_pipeline_action="list",
store_path=str(tmp_path / "teams_pipeline_store.json"),
)
)
out = capsys.readouterr().out
assert "job-1" in out
assert "meeting-1" in out
def test_show_prints_job_json(capsys, tmp_path):
store = TeamsPipelineStore(tmp_path / "teams_pipeline_store.json")
store.upsert_job(
"job-1",
{
"event_id": "evt-1",
"source_event_type": "updated",
"dedupe_key": "evt-1",
"status": "completed",
"meeting_ref": {"meeting_id": "meeting-1"},
},
)
teams_pipeline_command(
_make_args(
teams_pipeline_action="show",
job_id="job-1",
store_path=str(tmp_path / "teams_pipeline_store.json"),
)
)
out = capsys.readouterr().out
payload = json.loads(out)
assert payload["job_id"] == "job-1"
assert payload["meeting_ref"]["meeting_id"] == "meeting-1"
def test_fetch_requires_meeting_identifier(capsys):
teams_pipeline_command(_make_args(teams_pipeline_action="fetch"))
out = capsys.readouterr().out
assert "meeting_id or join_web_url is required" in out
def test_subscriptions_lists_graph_subscriptions(monkeypatch, capsys):
class FakeClient:
async def collect_paginated(self, path):
assert path == "/subscriptions"
return [
{
"id": "sub-1",
"resource": "communications/onlineMeetings/getAllTranscripts",
"changeType": "updated",
"expirationDateTime": "2026-05-05T00:00:00Z",
}
]
monkeypatch.setattr("plugins.teams_pipeline.cli.build_graph_client", lambda: FakeClient())
teams_pipeline_command(_make_args(teams_pipeline_action="subscriptions"))
out = capsys.readouterr().out
assert "sub-1" in out
assert "getAllTranscripts" in out
def test_subscribe_defaults_to_created_for_transcript_resources(monkeypatch, capsys):
captured = {}
class FakeClient:
async def post_json(self, path, json_body=None, headers=None):
captured["path"] = path
captured["json_body"] = json_body
return {
"id": "sub-transcript",
"resource": json_body["resource"],
"changeType": json_body["changeType"],
"notificationUrl": json_body["notificationUrl"],
"expirationDateTime": json_body["expirationDateTime"],
}
monkeypatch.setattr("plugins.teams_pipeline.cli.build_graph_client", lambda: FakeClient())
teams_pipeline_command(
_make_args(
teams_pipeline_action="subscribe",
resource="communications/onlineMeetings/getAllTranscripts",
notification_url="https://example.com/webhooks/msgraph",
change_type="",
)
)
payload = json.loads(capsys.readouterr().out)
assert captured["path"] == "/subscriptions"
assert captured["json_body"]["changeType"] == "created"
assert payload["changeType"] == "created"
def test_token_health_force_refresh(monkeypatch, capsys):
class FakeProvider:
def inspect_token_health(self):
return {"configured": True, "cache_state": "warm"}
async def get_access_token(self, force_refresh=False):
assert force_refresh is True
return "token-123"
monkeypatch.setattr(
"plugins.teams_pipeline.cli.MicrosoftGraphTokenProvider",
SimpleNamespace(from_env=lambda: FakeProvider()),
)
teams_pipeline_command(_make_args(teams_pipeline_action="token-health", force_refresh=True))
payload = json.loads(capsys.readouterr().out)
assert payload["configured"] is True
assert payload["last_refresh_succeeded"] is True
assert payload["access_token_length"] == len("token-123")
def test_validate_accepts_msgraph_credentials_for_graph_delivery(monkeypatch, capsys, tmp_path):
from gateway.config import Platform, PlatformConfig
monkeypatch.setenv("MSGRAPH_TENANT_ID", "tenant")
monkeypatch.setenv("MSGRAPH_CLIENT_ID", "client")
monkeypatch.setenv("MSGRAPH_CLIENT_SECRET", "secret")
gateway_config = SimpleNamespace(
platforms={
Platform.MSGRAPH_WEBHOOK: PlatformConfig(enabled=True, extra={}),
Platform("teams"): PlatformConfig(
enabled=True,
extra={
"delivery_mode": "graph",
"team_id": "team-1",
"channel_id": "channel-1",
},
),
}
)
monkeypatch.setattr(
"plugins.teams_pipeline.cli.load_gateway_config",
lambda: gateway_config,
)
teams_pipeline_command(
_make_args(
teams_pipeline_action="validate",
store_path=str(tmp_path / "teams_pipeline_store.json"),
)
)
payload = json.loads(capsys.readouterr().out)
assert payload["ok"] is True
assert payload["issues"] == []

View file

@ -192,13 +192,19 @@ class TestTencentTokenhubCanonicalProvider:
class TestTencentInOpenRouterAndNous:
"""tencent/hy3-preview:free should appear in OpenRouter and Nous curated lists."""
"""tencent/hy3-preview:free and tencent/hy3-preview should appear in OpenRouter and Nous curated lists."""
def test_in_openrouter_fallback(self):
from hermes_cli.models import OPENROUTER_MODELS
ids = [mid for mid, _ in OPENROUTER_MODELS]
assert "tencent/hy3-preview:free" in ids
def test_paid_in_openrouter_fallback(self):
"""tencent/hy3-preview (paid, no :free suffix) should also be in OpenRouter list."""
from hermes_cli.models import OPENROUTER_MODELS
ids = [mid for mid, _ in OPENROUTER_MODELS]
assert "tencent/hy3-preview" in ids
def test_in_nous_provider_models(self):
from hermes_cli.models import _PROVIDER_MODELS
assert "tencent/hy3-preview" in _PROVIDER_MODELS["nous"]
@ -298,12 +304,20 @@ class TestTencentTokenhubURLMapping:
class TestTencentTokenhubContextLength:
"""hy3-preview context length is registered."""
"""hy3-preview has a context-length entry registered.
def test_hy3_preview_context_length(self):
Asserting the relationship (registered + 4096) instead of a
specific value, per AGENTS.md "Don't write change-detector tests".
The previous version of this class pinned an exact integer that
broke whenever Tencent / OpenRouter bumped the published context
window (#22268).
"""
def test_hy3_preview_has_registered_context_length(self):
from agent.model_metadata import get_model_context_length
ctx = get_model_context_length("hy3-preview")
assert ctx == 256000
assert isinstance(ctx, int)
assert ctx >= 4096, f"hy3-preview context length looks unset/wrong: {ctx}"
# =============================================================================
@ -420,7 +434,7 @@ class TestTencentTokenhubCLIDispatch:
class TestTencentTokenhubModelCatalogJSON:
"""Verify tencent/hy3-preview:free is present in the website model-catalog.json."""
"""Verify tencent/hy3-preview:free and tencent/hy3-preview are present in the website model-catalog.json."""
def test_in_model_catalog_json(self):
catalog_path = os.path.join(
@ -445,6 +459,7 @@ class TestTencentTokenhubModelCatalogJSON:
for model in provider_entry.get("models", []):
all_ids.add(model.get("id", ""))
assert "tencent/hy3-preview:free" in all_ids
assert "tencent/hy3-preview" in all_ids
# =============================================================================

View file

@ -2,12 +2,16 @@
from unittest.mock import patch
import pytest
from hermes_cli.tools_config import (
_DEFAULT_OFF_TOOLSETS,
_apply_toolset_change,
_configure_provider,
_reconfigure_provider,
_get_platform_tools,
_platform_toolset_summary,
_reconfigure_tool,
_save_platform_tools,
_toolset_has_keys,
CONFIGURABLE_TOOLSETS,
@ -115,12 +119,79 @@ def test_get_platform_tools_homeassistant_toolset_off_for_cron_when_hass_token_m
assert "homeassistant" not in cron_enabled
def test_get_platform_tools_expands_composite_when_mixed_with_configurable():
"""``[hermes-cli, spotify]`` (composite + configurable) must keep the full
``hermes-cli`` toolset alongside the explicit Spotify opt-in. The
has_explicit_config branch used to drop ``hermes-cli`` on the floor,
leaving sessions with only ``{spotify, kanban}``."""
config = {"platform_toolsets": {"cli": ["hermes-cli", "spotify"]}}
enabled = _get_platform_tools(config, "cli", include_default_mcp_servers=False)
# Native tools must reappear.
for ts in ("terminal", "file", "web", "browser", "memory", "delegation",
"code_execution", "todo", "session_search", "skills"):
assert ts in enabled, f"{ts} should be enabled when hermes-cli is listed"
# User explicitly opted into Spotify — must survive _DEFAULT_OFF_TOOLSETS subtraction.
assert "spotify" in enabled
def test_get_platform_tools_composite_only_unchanged():
"""Composite-only config (no configurable in list) must still take the
else-branch path and produce the full toolset guards against the new
code accidentally hijacking the composite-only case."""
composite_only = _get_platform_tools(
{"platform_toolsets": {"cli": ["hermes-cli"]}},
"cli",
include_default_mcp_servers=False,
)
default = _get_platform_tools({}, "cli", include_default_mcp_servers=False)
assert composite_only == default
def test_get_platform_tools_configurable_only_no_expansion():
"""Configurable-only list (no composite) must not pull in unrelated
toolsets guards against the expansion firing when ``composite_tools``
is empty."""
config = {"platform_toolsets": {"cli": ["terminal", "file"]}}
enabled = _get_platform_tools(config, "cli", include_default_mcp_servers=False)
assert "terminal" in enabled
assert "file" in enabled
# Web shouldn't sneak in via the new expansion path.
assert "web" not in enabled
def test_get_platform_tools_mixed_does_not_resurrect_default_off():
"""Expansion must subtract _DEFAULT_OFF_TOOLSETS from the implicit
pull-in. Without this, ``hermes-cli`` expansion would re-enable
``moa`` / ``rl`` / ``homeassistant`` for users who never opted in."""
config = {"platform_toolsets": {"cli": ["hermes-cli", "terminal"]}}
enabled = _get_platform_tools(config, "cli", include_default_mcp_servers=False)
assert "terminal" in enabled
assert "moa" not in enabled
assert "rl" not in enabled
def test_get_platform_tools_preserves_explicit_empty_selection():
config = {"platform_toolsets": {"cli": []}}
enabled = _get_platform_tools(config, "cli")
assert enabled == set()
# An explicit empty list disables every CONFIGURABLE toolset (web,
# terminal, memory, …). Non-configurable platform toolsets that ride
# along on the platform's default composite (e.g. `kanban`, whose tools
# live in _HERMES_CORE_TOOLS but aren't user-toggleable) are still
# auto-recovered by _get_platform_tools so saving via `hermes tools`
# doesn't silently drop them. The contract this test guards is the
# configurable side: nothing the user could have checked in the TUI
# checklist should reappear here.
configurable = {ts_key for ts_key, _, _ in CONFIGURABLE_TOOLSETS}
assert enabled.isdisjoint(configurable)
def test_apply_toolset_change_from_default_does_not_enable_default_off_toolsets():
@ -459,6 +530,33 @@ def test_local_browser_provider_is_saved_explicitly(monkeypatch):
assert config["browser"]["cloud_provider"] == "local"
def test_reconfigure_lists_enabled_web_without_existing_provider_config(monkeypatch):
config = {"platform_toolsets": {"cli": ["web"]}}
seen = {}
configured = []
monkeypatch.setattr(
"hermes_cli.tools_config._toolset_has_keys",
lambda ts_key, config=None: False,
)
def fake_prompt_choice(question, choices, default=0):
seen["choices"] = choices
return 0
monkeypatch.setattr("hermes_cli.tools_config._prompt_choice", fake_prompt_choice)
monkeypatch.setattr(
"hermes_cli.tools_config._configure_tool_category_for_reconfig",
lambda ts_key, cat, config: configured.append(ts_key),
)
monkeypatch.setattr("hermes_cli.tools_config.save_config", lambda config: None)
_reconfigure_tool(config)
assert any("Web Search" in choice for choice in seen["choices"])
assert configured == ["web"]
def test_first_install_nous_auto_configures_managed_defaults(monkeypatch):
monkeypatch.setattr("hermes_cli.tools_config.managed_nous_tools_enabled", lambda: True)
monkeypatch.setattr("hermes_cli.nous_subscription.managed_nous_tools_enabled", lambda: True)
@ -861,3 +959,27 @@ def test_get_effective_configurable_toolsets_dedupes_bundled_plugins():
assert len(spotify_rows) == 1, spotify_rows
# Built-in label wins over the plugin label.
assert spotify_rows[0][1] == "🎵 Spotify"
@pytest.mark.parametrize("provider,config_key,expected", [
# managed provider → use_gateway True
({"name": "T", "tts_provider": "elevenlabs", "managed_nous_feature": "tts", "env_vars": []}, "tts", True),
({"name": "B", "browser_provider": "browserbase", "managed_nous_feature": "browser", "env_vars": []}, "browser", True),
({"name": "W", "web_backend": "tavily", "managed_nous_feature": "web", "env_vars": []}, "web", True),
# self-hosted provider → use_gateway False
({"name": "T", "tts_provider": "elevenlabs", "env_vars": []}, "tts", False),
({"name": "B", "browser_provider": "browserbase", "env_vars": []}, "browser", False),
({"name": "W", "web_backend": "tavily", "env_vars": []}, "web", False),
])
def test_reconfigure_provider_syncs_use_gateway(provider, config_key, expected):
config = {}
_reconfigure_provider(provider, config)
assert config[config_key]["use_gateway"] is expected
def test_reconfigure_browser_provider_overwrites_stale_use_gateway():
# Switching from managed (use_gateway=True) to self-hosted must clear the stale flag.
config = {"browser": {"cloud_provider": "managed-browser", "use_gateway": True}}
provider = {"name": "Browserbase", "browser_provider": "browserbase", "env_vars": []}
_reconfigure_provider(provider, config)
assert config["browser"]["use_gateway"] is False

View file

@ -69,6 +69,39 @@ def test_no_install_when_only_optional_peer_package_missing_from_hidden_lock(tmp
assert main_mod._tui_need_npm_install(tmp_path) is False
def test_no_install_when_only_peer_annotation_differs(tmp_path: Path, main_mod) -> None:
"""npm 9 drops the ``peer`` flag from the hidden lock on dev-deps that are
*also* declared as peers. That's a cosmetic difference — the package is
installed at the requested version so it must not trigger a reinstall.
Regression for the TUI-in-Docker failure where 16 such mismatches caused
`Installing TUI dependencies` EACCES on every launch.
"""
_touch_ink(tmp_path)
(tmp_path / "package-lock.json").write_text(
'{"packages":{'
'"node_modules/foo":{"version":"1.0.0","dev":true,"peer":true,"resolved":"https://x/foo.tgz"}'
'}}'
)
(tmp_path / "node_modules" / ".package-lock.json").write_text(
'{"packages":{'
'"node_modules/foo":{"version":"1.0.0","dev":true,"resolved":"https://x/foo.tgz"}'
'}}'
)
assert main_mod._tui_need_npm_install(tmp_path) is False
def test_install_when_version_differs_even_with_peer_drop(tmp_path: Path, main_mod) -> None:
"""The peer-drop tolerance must not mask a real version skew."""
_touch_ink(tmp_path)
(tmp_path / "package-lock.json").write_text(
'{"packages":{"node_modules/foo":{"version":"2.0.0","dev":true,"peer":true}}}'
)
(tmp_path / "node_modules" / ".package-lock.json").write_text(
'{"packages":{"node_modules/foo":{"version":"1.0.0","dev":true}}}'
)
assert main_mod._tui_need_npm_install(tmp_path) is True
def test_no_install_when_lock_older_than_marker(tmp_path: Path, main_mod) -> None:
_touch_ink(tmp_path)
(tmp_path / "package-lock.json").write_text("{}")

View file

@ -36,7 +36,14 @@ def test_cmd_chat_tui_continue_uses_latest_tui_session(monkeypatch, main_mod):
calls.append(source)
return "20260408_235959_a1b2c3" if source == "tui" else None
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None, toolsets=None):
def fake_launch(
resume_session_id=None,
tui_dev=False,
model=None,
provider=None,
toolsets=None,
**kwargs,
):
captured["resume"] = resume_session_id
raise SystemExit(0)
@ -63,7 +70,14 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
return "20260408_235959_d4e5f6"
return None
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None, toolsets=None):
def fake_launch(
resume_session_id=None,
tui_dev=False,
model=None,
provider=None,
toolsets=None,
**kwargs,
):
captured["resume"] = resume_session_id
raise SystemExit(0)
@ -81,7 +95,14 @@ def test_cmd_chat_tui_continue_falls_back_to_latest_cli_session(monkeypatch, mai
def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod):
captured = {}
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None, toolsets=None):
def fake_launch(
resume_session_id=None,
tui_dev=False,
model=None,
provider=None,
toolsets=None,
**kwargs,
):
captured["resume"] = resume_session_id
raise SystemExit(0)
@ -99,7 +120,14 @@ def test_cmd_chat_tui_resume_resolves_title_before_launch(monkeypatch, main_mod)
def test_cmd_chat_tui_passes_model_and_provider(monkeypatch, main_mod):
captured = {}
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None, toolsets=None):
def fake_launch(
resume_session_id=None,
tui_dev=False,
model=None,
provider=None,
toolsets=None,
**kwargs,
):
captured.update(
{
"model": model,
@ -130,7 +158,14 @@ def test_cmd_chat_tui_passes_model_and_provider(monkeypatch, main_mod):
def test_cmd_chat_tui_passes_toolsets(monkeypatch, main_mod):
captured = {}
def fake_launch(resume_session_id=None, tui_dev=False, model=None, provider=None, toolsets=None):
def fake_launch(
resume_session_id=None,
tui_dev=False,
model=None,
provider=None,
toolsets=None,
**kwargs,
):
captured["toolsets"] = toolsets
raise SystemExit(0)
@ -142,22 +177,74 @@ def test_cmd_chat_tui_passes_toolsets(monkeypatch, main_mod):
assert captured["toolsets"] == "web,terminal"
def test_cmd_chat_tui_forwards_chat_flags(monkeypatch, main_mod):
captured = {}
def fake_launch(resume_session_id=None, **kwargs):
captured["resume_session_id"] = resume_session_id
captured.update(kwargs)
raise SystemExit(0)
monkeypatch.setattr(main_mod, "_launch_tui", fake_launch)
with pytest.raises(SystemExit):
main_mod.cmd_chat(
_args(
skills=["foo,bar"],
verbose=True,
quiet=True,
query="hello",
image="/tmp/cat.png",
worktree=True,
checkpoints=True,
pass_session_id=True,
max_turns=7,
accept_hooks=True,
)
)
assert captured["skills"] == ["foo,bar"]
assert captured["verbose"] is True
assert captured["quiet"] is True
assert captured["query"] == "hello"
assert captured["image"] == "/tmp/cat.png"
assert captured["worktree"] is True
assert captured["checkpoints"] is True
assert captured["pass_session_id"] is True
assert captured["max_turns"] == 7
assert captured["accept_hooks"] is True
def test_main_top_level_tui_accepts_toolsets(monkeypatch, main_mod):
captured = {}
import hermes_cli.config as config_mod
monkeypatch.setattr(sys, "argv", ["hermes", "--tui", "--toolsets", "web,terminal"])
monkeypatch.setitem(sys.modules, "hermes_cli.plugins", types.SimpleNamespace(discover_plugins=lambda: None))
monkeypatch.setitem(sys.modules, "tools.mcp_tool", types.SimpleNamespace(discover_mcp_tools=lambda: None))
monkeypatch.setitem(
sys.modules,
"hermes_cli.plugins",
types.SimpleNamespace(discover_plugins=lambda: None),
)
monkeypatch.setitem(
sys.modules,
"tools.mcp_tool",
types.SimpleNamespace(discover_mcp_tools=lambda: None),
)
monkeypatch.setattr(config_mod, "load_config", lambda: {})
monkeypatch.setattr(config_mod, "get_container_exec_info", lambda: None)
monkeypatch.setitem(
sys.modules,
"agent.shell_hooks",
types.SimpleNamespace(register_from_config=lambda _cfg, accept_hooks=False: None),
types.SimpleNamespace(
register_from_config=lambda _cfg, accept_hooks=False: None
),
)
monkeypatch.setattr(
main_mod,
"cmd_chat",
lambda args: captured.update({"toolsets": args.toolsets, "tui": args.tui}),
)
monkeypatch.setattr(main_mod, "cmd_chat", lambda args: captured.update({"toolsets": args.toolsets, "tui": args.tui}))
main_mod.main()
@ -169,27 +256,49 @@ def test_main_top_level_oneshot_accepts_toolsets(monkeypatch, main_mod):
import hermes_cli.config as config_mod
monkeypatch.setattr(sys, "argv", ["hermes", "-z", "hello", "--toolsets", "web,terminal"])
monkeypatch.setitem(sys.modules, "hermes_cli.plugins", types.SimpleNamespace(discover_plugins=lambda: None))
monkeypatch.setitem(sys.modules, "tools.mcp_tool", types.SimpleNamespace(discover_mcp_tools=lambda: None))
monkeypatch.setattr(
sys, "argv", ["hermes", "-z", "hello", "--toolsets", "web,terminal"]
)
monkeypatch.setitem(
sys.modules,
"hermes_cli.plugins",
types.SimpleNamespace(discover_plugins=lambda: None),
)
monkeypatch.setitem(
sys.modules,
"tools.mcp_tool",
types.SimpleNamespace(discover_mcp_tools=lambda: None),
)
monkeypatch.setattr(config_mod, "load_config", lambda: {})
monkeypatch.setattr(config_mod, "get_container_exec_info", lambda: None)
monkeypatch.setitem(
sys.modules,
"agent.shell_hooks",
types.SimpleNamespace(register_from_config=lambda _cfg, accept_hooks=False: None),
types.SimpleNamespace(
register_from_config=lambda _cfg, accept_hooks=False: None
),
)
monkeypatch.setitem(
sys.modules,
"hermes_cli.oneshot",
types.SimpleNamespace(run_oneshot=lambda prompt, **kwargs: captured.update({"prompt": prompt, **kwargs}) or 0),
types.SimpleNamespace(
run_oneshot=lambda prompt, **kwargs: captured.update(
{"prompt": prompt, **kwargs}
)
or 0
),
)
with pytest.raises(SystemExit) as exc:
main_mod.main()
assert exc.value.code == 0
assert captured == {"prompt": "hello", "model": None, "provider": None, "toolsets": "web,terminal"}
assert captured == {
"prompt": "hello",
"model": None,
"provider": None,
"toolsets": "web,terminal",
}
def _stub_plugin_discovery(monkeypatch):
@ -256,7 +365,9 @@ def test_oneshot_accepts_plugin_toolset_after_discovery(monkeypatch):
monkeypatch.setitem(
sys.modules,
"hermes_cli.plugins",
types.SimpleNamespace(discover_plugins=lambda: discovered.update({"ready": True})),
types.SimpleNamespace(
discover_plugins=lambda: discovered.update({"ready": True})
),
)
valid, error = _validate_explicit_toolsets("plugin_demo")
@ -308,6 +419,72 @@ def test_oneshot_distinguishes_disabled_mcp_from_unknown(monkeypatch, capsys):
assert "mcp-off" in err
def test_oneshot_wires_session_db_for_recall(monkeypatch):
"""hermes -z bypasses HermesCLI, but recall still needs SessionDB."""
from hermes_cli.oneshot import _run_agent
captured = {}
sentinel_db = object()
class FakeAgent:
def __init__(self, **kwargs):
captured.update(kwargs)
self.suppress_status_output = False
self.stream_delta_callback = object()
self.tool_gen_callback = object()
def chat(self, prompt):
captured["prompt"] = prompt
return "ok"
class FakeSessionDB:
def __new__(cls):
return sentinel_db
def mod(name, **attrs):
module = types.ModuleType(name)
for key, value in attrs.items():
setattr(module, key, value)
return module
monkeypatch.setitem(sys.modules, "run_agent", mod("run_agent", AIAgent=FakeAgent))
monkeypatch.setitem(sys.modules, "hermes_state", mod("hermes_state", SessionDB=FakeSessionDB))
monkeypatch.setitem(
sys.modules,
"hermes_cli.config",
mod("hermes_cli.config", load_config=lambda: {"model": {"default": "m"}}),
)
monkeypatch.setitem(
sys.modules,
"hermes_cli.models",
mod("hermes_cli.models", detect_provider_for_model=lambda *_args, **_kwargs: None),
)
monkeypatch.setitem(
sys.modules,
"hermes_cli.runtime_provider",
mod(
"hermes_cli.runtime_provider",
resolve_runtime_provider=lambda **_kwargs: {
"api_key": "k",
"base_url": "u",
"provider": "p",
"api_mode": "chat_completions",
"credential_pool": None,
},
),
)
monkeypatch.setitem(
sys.modules,
"hermes_cli.tools_config",
mod("hermes_cli.tools_config", _get_platform_tools=lambda *_args, **_kwargs: {"session_search"}),
)
assert _run_agent("recall this") == "ok"
assert captured["session_db"] is sentinel_db
assert captured["enabled_toolsets"] == ["session_search"]
assert captured["prompt"] == "recall this"
def test_launch_tui_exports_model_provider_and_toolsets(monkeypatch, main_mod):
captured = {}
active_path_during_call = None
@ -328,7 +505,9 @@ def test_launch_tui_exports_model_provider_and_toolsets(monkeypatch, main_mod):
monkeypatch.setattr(main_mod.subprocess, "call", fake_call)
with pytest.raises(SystemExit):
main_mod._launch_tui(model="nous/hermes-test", provider="nous", toolsets="web, terminal")
main_mod._launch_tui(
model="nous/hermes-test", provider="nous", toolsets="web, terminal"
)
env = captured["env"]
assert env["HERMES_MODEL"] == "nous/hermes-test"

View file

@ -311,7 +311,8 @@ def test_cmd_update_retries_optional_extras_individually_when_all_fails(monkeypa
"""When .[all] fails, update should keep base deps and retry extras individually."""
_setup_update_mocks(monkeypatch, tmp_path)
monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/uv" if name == "uv" else None)
monkeypatch.setattr(hermes_main, "_load_installable_optional_extras", lambda: ["matrix", "mcp"])
monkeypatch.setattr(hermes_main, "_is_termux_env", lambda env=None: False)
monkeypatch.setattr(hermes_main, "_load_installable_optional_extras", lambda group="all": ["matrix", "mcp"])
recorded = []
@ -323,15 +324,15 @@ def test_cmd_update_retries_optional_extras_individually_when_all_fails(monkeypa
return SimpleNamespace(stdout="main\n", stderr="", returncode=0)
if cmd == ["git", "rev-list", "HEAD..origin/main", "--count"]:
return SimpleNamespace(stdout="1\n", stderr="", returncode=0)
if cmd == ["git", "pull", "origin", "main"]:
if cmd == ["git", "pull", "--ff-only", "origin", "main"]:
return SimpleNamespace(stdout="Updating\n", stderr="", returncode=0)
if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[all]", "--quiet"]:
if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[all]"]:
raise CalledProcessError(returncode=1, cmd=cmd)
if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".", "--quiet"]:
if cmd == ["/usr/bin/uv", "pip", "install", "-e", "."]:
return SimpleNamespace(returncode=0)
if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[matrix]", "--quiet"]:
if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[matrix]"]:
raise CalledProcessError(returncode=1, cmd=cmd)
if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[mcp]", "--quiet"]:
if cmd == ["/usr/bin/uv", "pip", "install", "-e", ".[mcp]"]:
return SimpleNamespace(returncode=0)
# Catch-all must include stdout/stderr so consumers that parse
# output (e.g. the dashboard-restart `ps -A` scan added in the
@ -344,10 +345,10 @@ def test_cmd_update_retries_optional_extras_individually_when_all_fails(monkeypa
install_cmds = [c for c in recorded if "pip" in c and "install" in c]
assert install_cmds == [
["/usr/bin/uv", "pip", "install", "-e", ".[all]", "--quiet"],
["/usr/bin/uv", "pip", "install", "-e", ".", "--quiet"],
["/usr/bin/uv", "pip", "install", "-e", ".[matrix]", "--quiet"],
["/usr/bin/uv", "pip", "install", "-e", ".[mcp]", "--quiet"],
["/usr/bin/uv", "pip", "install", "-e", ".[all]"],
["/usr/bin/uv", "pip", "install", "-e", "."],
["/usr/bin/uv", "pip", "install", "-e", ".[matrix]"],
["/usr/bin/uv", "pip", "install", "-e", ".[mcp]"],
]
out = capsys.readouterr().out
@ -360,6 +361,7 @@ def test_cmd_update_succeeds_with_extras(monkeypatch, tmp_path):
"""When .[all] succeeds, no fallback should be attempted."""
_setup_update_mocks(monkeypatch, tmp_path)
monkeypatch.setattr("shutil.which", lambda name: "/usr/bin/uv" if name == "uv" else None)
monkeypatch.setattr(hermes_main, "_is_termux_env", lambda env=None: False)
recorded = []
@ -371,7 +373,7 @@ def test_cmd_update_succeeds_with_extras(monkeypatch, tmp_path):
return SimpleNamespace(stdout="main\n", stderr="", returncode=0)
if cmd == ["git", "rev-list", "HEAD..origin/main", "--count"]:
return SimpleNamespace(stdout="1\n", stderr="", returncode=0)
if cmd == ["git", "pull", "origin", "main"]:
if cmd == ["git", "pull", "--ff-only", "origin", "main"]:
return SimpleNamespace(stdout="Updating\n", stderr="", returncode=0)
return SimpleNamespace(returncode=0, stdout="", stderr="")
@ -384,6 +386,54 @@ def test_cmd_update_succeeds_with_extras(monkeypatch, tmp_path):
assert ".[all]" in install_cmds[0]
def test_install_with_optional_fallback_honors_custom_group(monkeypatch):
"""Termux update path should target .[termux-all] when requested."""
calls = []
monkeypatch.setattr(
hermes_main,
"_load_installable_optional_extras",
lambda group="all": ["termux", "mcp"] if group == "termux-all" else [],
)
def fake_run_with_heartbeat(cmd, **kwargs):
calls.append(cmd)
if cmd[-1] == ".[termux-all]":
raise CalledProcessError(returncode=1, cmd=cmd)
return None
monkeypatch.setattr(hermes_main, "_run_install_with_heartbeat", fake_run_with_heartbeat)
hermes_main._install_python_dependencies_with_optional_fallback(
["/usr/bin/uv", "pip"],
group="termux-all",
)
assert calls == [
["/usr/bin/uv", "pip", "install", "-e", ".[termux-all]"],
["/usr/bin/uv", "pip", "install", "-e", "."],
["/usr/bin/uv", "pip", "install", "-e", ".[termux]"],
["/usr/bin/uv", "pip", "install", "-e", ".[mcp]"],
]
def test_install_heartbeat_prints_when_dependency_install_is_silent(monkeypatch, capsys):
"""Long quiet installs should emit periodic heartbeat lines."""
def fake_run(cmd, **kwargs):
hermes_main._time.sleep(1.2)
return SimpleNamespace(returncode=0)
monkeypatch.setattr(hermes_main.subprocess, "run", fake_run)
hermes_main._run_install_with_heartbeat(
["uv", "pip", "install", "-e", "."],
heartbeat_interval_seconds=1,
)
out = capsys.readouterr().out
assert "still installing dependencies" in out
# ---------------------------------------------------------------------------
# ff-only fallback to reset --hard on diverged history
# ---------------------------------------------------------------------------

View file

@ -392,6 +392,91 @@ class TestCmdUpdateLaunchdRestart:
captured = capsys.readouterr().out
assert "Restart manually: hermes gateway run" in captured
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_restarts_profile_manual_gateways(
self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
):
"""Profile-mapped manual gateways are relaunched automatically after update."""
monkeypatch.setattr(gateway_cli, "is_macos", lambda: True)
monkeypatch.setattr(
gateway_cli,
"get_launchd_plist_path",
lambda: tmp_path / "ai.hermes.gateway.plist",
)
mock_run.side_effect = _make_run_side_effect(
commit_count="3",
launchctl_loaded=False,
)
process = gateway_cli.ProfileGatewayProcess(
profile="coder",
path=tmp_path / ".hermes" / "profiles" / "coder",
pid=12345,
)
# ``find_gateway_pids`` is invoked twice: once to enumerate manual
# PIDs to restart, then again ~3s later by the post-restart survivor
# sweep (#17648). Return the live PID first, then an empty list to
# simulate the process actually exiting after the graceful restart
# — otherwise the sweep would SIGKILL pid 12345 even though graceful
# drain succeeded, and ``kill.assert_not_called()`` would fire.
with patch.object(gateway_cli, "find_gateway_pids", side_effect=[[12345], []]), \
patch.object(gateway_cli, "find_profile_gateway_processes", return_value=[process]), \
patch.object(gateway_cli, "launch_detached_profile_gateway_restart", return_value=True) as restart, \
patch.object(gateway_cli, "_graceful_restart_via_sigusr1", return_value=True) as graceful, \
patch("os.kill") as kill:
cmd_update(mock_args)
captured = capsys.readouterr().out
restart.assert_called_once_with("coder", 12345)
graceful.assert_called_once()
# Graceful drain succeeded — no SIGTERM fallback needed.
kill.assert_not_called()
assert "Restarting manual gateway profile(s): coder" in captured
assert "Restart manually: hermes gateway run" not in captured
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_profile_manual_gateway_falls_back_to_sigterm(
self, mock_run, _mock_which, mock_args, capsys, tmp_path, monkeypatch,
):
"""When graceful SIGUSR1 drain fails, manual profile restart falls back to SIGTERM."""
monkeypatch.setattr(gateway_cli, "is_macos", lambda: True)
monkeypatch.setattr(
gateway_cli,
"get_launchd_plist_path",
lambda: tmp_path / "ai.hermes.gateway.plist",
)
mock_run.side_effect = _make_run_side_effect(
commit_count="3",
launchctl_loaded=False,
)
process = gateway_cli.ProfileGatewayProcess(
profile="coder",
path=tmp_path / ".hermes" / "profiles" / "coder",
pid=12345,
)
# See note in ``test_update_restarts_profile_manual_gateways``: the
# post-restart survivor sweep (#17648) re-queries ``find_gateway_pids``
# ~3s after the restart attempt. Return ``[]`` on the second call so
# the SIGTERM fallback isn't escalated to SIGKILL by the sweep.
with patch.object(gateway_cli, "find_gateway_pids", side_effect=[[12345], []]), \
patch.object(gateway_cli, "find_profile_gateway_processes", return_value=[process]), \
patch.object(gateway_cli, "launch_detached_profile_gateway_restart", return_value=True) as restart, \
patch.object(gateway_cli, "_graceful_restart_via_sigusr1", return_value=False) as graceful, \
patch("os.kill") as kill:
cmd_update(mock_args)
captured = capsys.readouterr().out
restart.assert_called_once_with("coder", 12345)
graceful.assert_called_once()
# Graceful drain returned False → SIGTERM fallback.
kill.assert_called_once()
assert "Restarting manual gateway profile(s): coder" in captured
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_with_systemd_still_restarts_via_systemd(
@ -568,6 +653,77 @@ class TestCmdUpdateLaunchdRestart:
"Drain path failed; expected fallback `systemctl restart`."
)
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_bypasses_restartsec_after_graceful_drain(
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
):
"""After a graceful SIGUSR1 drain, cmd_update must issue
``reset-failed`` + ``start`` to bypass the unit's ``RestartSec``
cooldown (default 60s on our unit file) rather than passively
waiting for systemd's auto-restart. Collapses the post-drain delay
from ~60s to ~5s on a voluntary restart.
"""
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
def side_effect(cmd, **kwargs):
joined = " ".join(str(c) for c in cmd)
if "rev-parse" in joined and "--abbrev-ref" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="main\n", stderr="")
if "rev-parse" in joined and "--verify" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
if "rev-list" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="3\n", stderr="")
if "systemctl" in joined and "list-units" in joined:
if "--user" in joined:
return subprocess.CompletedProcess(
cmd, 0,
stdout="hermes-gateway.service loaded active running\n",
stderr="",
)
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
if "systemctl" in joined and "is-active" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
if "systemctl" in joined and "show" in joined and "MainPID" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
mock_run.side_effect = side_effect
# Simulate a successful graceful drain so cmd_update reaches the
# post-drain restart bypass.
monkeypatch.setattr(
"hermes_cli.gateway._graceful_restart_via_sigusr1",
lambda pid, drain_timeout: True,
)
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
cmd_update(mock_args)
calls = [
" ".join(str(a) for a in c.args[0])
for c in mock_run.call_args_list
if "systemctl" in " ".join(str(a) for a in c.args[0])
]
# Must have called ``reset-failed hermes-gateway`` AND ``start
# hermes-gateway`` explicitly so systemd bypasses RestartSec.
reset_calls = [c for c in calls if "reset-failed" in c and "hermes-gateway" in c]
start_calls = [
c for c in calls
if "start" in c and "hermes-gateway" in c and "restart" not in c
]
assert reset_calls, (
f"Expected explicit `reset-failed hermes-gateway` after graceful drain; "
f"systemctl calls were: {calls}"
)
assert start_calls, (
f"Expected explicit `start hermes-gateway` after graceful drain to "
f"bypass RestartSec; systemctl calls were: {calls}"
)
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_update_no_gateway_running_skips_restart(
@ -797,15 +953,25 @@ class TestServicePidExclusion:
launchctl_loaded=True,
)
# Survivor sweep (#17648) re-queries ``find_gateway_pids`` after
# SIGTERM. ``os.kill`` is mocked, so the PID never "dies" — track
# the killed-via-SIGTERM PIDs ourselves and exclude them on later
# calls to simulate the OS reaping the process. Without this the
# sweep escalates with SIGKILL and ``manual_kills == 2`` instead of 1.
_killed_pids: set[int] = set()
def fake_find(exclude_pids=None, all_profiles=False):
_exclude = exclude_pids or set()
_exclude = (exclude_pids or set()) | _killed_pids
return [p for p in [SERVICE_PID, MANUAL_PID] if p not in _exclude]
def fake_kill(pid, _sig):
_killed_pids.add(pid)
with patch.object(
gateway_cli, "_get_service_pids", return_value={SERVICE_PID}
), patch.object(
gateway_cli, "find_gateway_pids", side_effect=fake_find,
), patch("os.kill") as mock_kill:
), patch("os.kill", side_effect=fake_kill) as mock_kill:
cmd_update(mock_args)
captured = capsys.readouterr().out
@ -1261,3 +1427,232 @@ class TestCmdUpdateLegacyGatewayWarning:
assert "Legacy Hermes gateway" in captured
assert "(system scope)" in captured
assert "sudo" in captured
# ---------------------------------------------------------------------------
# cmd_update — reset-failed precedes systemctl restart on fallback path
# ---------------------------------------------------------------------------
def _systemctl_calls(mock_run, subcommand):
"""Return every subprocess.run call that was `systemctl [--user] <subcommand>`."""
out = []
for call in mock_run.call_args_list:
argv = call.args[0]
joined = " ".join(str(c) for c in argv)
if "systemctl" in joined and subcommand in joined:
out.append(argv)
return out
class TestCmdUpdateResetFailedBeforeRestart:
"""`hermes update` must call `systemctl reset-failed` before every
fallback `systemctl restart` so a systemd-parked `failed` state from
earlier auto-restart crashes (CHDIR, OOM, filesystem race) doesn't
permanently strand the unit.
Mirrors the recovery pattern `hermes gateway restart` (systemd_restart)
adopted in PR #20949. Without this, users hit "gateway never comes
back after update" until they manually run `systemctl reset-failed`.
"""
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_reset_failed_runs_before_fallback_restart(
self, mock_run, _mock_which, mock_args, monkeypatch,
):
"""When SIGUSR1 drain times out, the fallback systemctl restart
MUST be preceded by a `reset-failed` call against the same unit."""
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
mock_run.side_effect = _make_run_side_effect(
commit_count="3",
systemd_active=True,
)
# Force the graceful SIGUSR1 path to report failure so cmd_update
# falls back to systemctl restart.
orig = mock_run.side_effect
def wrapped(cmd, **kwargs):
joined = " ".join(str(c) for c in cmd)
if "systemctl" in joined and "show" in joined and "MainPID" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
return orig(cmd, **kwargs)
mock_run.side_effect = wrapped
monkeypatch.setattr(
"hermes_cli.gateway._graceful_restart_via_sigusr1",
lambda pid, drain_timeout: False,
)
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
cmd_update(mock_args)
reset_calls = _systemctl_calls(mock_run, "reset-failed")
restart_calls = _systemctl_calls(mock_run, "restart")
assert any(
"hermes-gateway" in " ".join(str(c) for c in call)
for call in reset_calls
), (
"Expected `systemctl reset-failed hermes-gateway` before the "
"fallback `systemctl restart`, got reset_calls=%r" % (reset_calls,)
)
assert restart_calls, "Fallback systemctl restart should still run"
# Order check: the first reset-failed must come before the first restart.
first_reset_idx = None
first_restart_idx = None
for idx, call in enumerate(mock_run.call_args_list):
joined = " ".join(str(c) for c in call.args[0])
if "systemctl" in joined and "reset-failed" in joined and first_reset_idx is None:
first_reset_idx = idx
if "systemctl" in joined and "restart" in joined and "hermes-gateway" in joined:
if first_restart_idx is None:
first_restart_idx = idx
assert first_reset_idx is not None and first_restart_idx is not None
assert first_reset_idx < first_restart_idx, (
f"reset-failed (call #{first_reset_idx}) must precede "
f"restart (call #{first_restart_idx}) so the unit isn't "
"blocked by systemd's failed-state backoff."
)
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_reset_failed_also_runs_before_retry_restart(
self, mock_run, _mock_which, mock_args, monkeypatch,
):
"""If the first fallback restart spawns a process that dies
immediately (is-active stays inactive), the retry restart must
ALSO be preceded by a reset-failed otherwise the retry races
the unit's own failed-state transition."""
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
# is-active toggles:
# first call (discovery / check active) -> "active"
# later calls (post-restart verify) -> "inactive"
# Using a state counter so both the initial check and the verify
# loops behave realistically.
is_active_calls = {"n": 0}
def side_effect(cmd, **kwargs):
joined = " ".join(str(c) for c in cmd)
if "rev-parse" in joined and "--abbrev-ref" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="main\n", stderr="")
if "rev-parse" in joined and "--verify" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
if "rev-list" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="3\n", stderr="")
if "systemctl" in joined and "list-units" in joined:
if "--user" in joined:
return subprocess.CompletedProcess(
cmd, 0,
stdout="hermes-gateway.service loaded active running\n",
stderr="",
)
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
if "systemctl" in joined and "is-active" in joined:
is_active_calls["n"] += 1
# First check: the unit is active (so we enter the restart path).
# Subsequent polling: inactive, which drives the retry branch.
if is_active_calls["n"] == 1:
return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
if "systemctl" in joined and "show" in joined and "MainPID" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
mock_run.side_effect = side_effect
# Force graceful SIGUSR1 to fail → fallback restart path.
monkeypatch.setattr(
"hermes_cli.gateway._graceful_restart_via_sigusr1",
lambda pid, drain_timeout: False,
)
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
cmd_update(mock_args)
reset_calls = _systemctl_calls(mock_run, "reset-failed")
restart_calls = _systemctl_calls(mock_run, "restart")
# Two restart attempts (initial + retry), two reset-failed calls.
gateway_restarts = [
c for c in restart_calls
if "hermes-gateway" in " ".join(str(a) for a in c)
]
gateway_resets = [
c for c in reset_calls
if "hermes-gateway" in " ".join(str(a) for a in c)
]
assert len(gateway_restarts) >= 2, (
f"Expected both initial + retry restart calls, got {len(gateway_restarts)}"
)
assert len(gateway_resets) >= 2, (
f"Expected reset-failed before BOTH restart attempts, "
f"got {len(gateway_resets)} reset-failed call(s)"
)
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_final_failure_message_tells_user_to_reset_failed(
self, mock_run, _mock_which, mock_args, capsys, monkeypatch,
):
"""When both fallback restart attempts fail, the final error
message must include `systemctl reset-failed` as part of the
manual recovery hint not just `systemctl restart` on its own,
which is the step that just failed twice."""
monkeypatch.setattr(gateway_cli, "is_macos", lambda: False)
monkeypatch.setattr(gateway_cli, "supports_systemd_services", lambda: True)
monkeypatch.setattr(gateway_cli, "is_termux", lambda: False)
is_active_calls = {"n": 0}
def side_effect(cmd, **kwargs):
joined = " ".join(str(c) for c in cmd)
if "rev-parse" in joined and "--abbrev-ref" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="main\n", stderr="")
if "rev-parse" in joined and "--verify" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
if "rev-list" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="3\n", stderr="")
if "systemctl" in joined and "list-units" in joined:
if "--user" in joined:
return subprocess.CompletedProcess(
cmd, 0,
stdout="hermes-gateway.service loaded active running\n",
stderr="",
)
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
if "systemctl" in joined and "is-active" in joined:
is_active_calls["n"] += 1
if is_active_calls["n"] == 1:
return subprocess.CompletedProcess(cmd, 0, stdout="active\n", stderr="")
return subprocess.CompletedProcess(cmd, 3, stdout="inactive\n", stderr="")
if "systemctl" in joined and "show" in joined and "MainPID" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="4242\n", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
mock_run.side_effect = side_effect
monkeypatch.setattr(
"hermes_cli.gateway._graceful_restart_via_sigusr1",
lambda pid, drain_timeout: False,
)
with patch.object(gateway_cli, "find_gateway_pids", return_value=[]):
cmd_update(mock_args)
captured = capsys.readouterr().out
assert "failed to stay running" in captured, (
"Expected the terminal failure message to fire when both "
f"restart attempts don't survive. Got:\n{captured}"
)
assert "reset-failed" in captured, (
"Final recovery hint must include `reset-failed` so users "
"know how to escape systemd's parked failed state. Got:\n"
f"{captured}"
)
assert "hermes-gateway" in captured

View file

@ -0,0 +1,137 @@
"""Tests for `hermes update --yes / -y` — assume yes for interactive prompts.
Covers:
1. argparse parses the flag
2. Config-migration prompt is auto-answered (no input() call) and migrate_config
runs with interactive=False so API-key prompts are skipped
3. Autostash restore prompt is auto-answered (prompt_for_restore == False, no
input() call) and the stash is applied automatically
"""
import subprocess
from types import SimpleNamespace
from unittest.mock import patch
from hermes_cli.main import cmd_update
def _make_run_side_effect(
branch="main", verify_ok=True, commit_count="1", dirty=False
):
"""Minimal subprocess.run side_effect for the update flow."""
def side_effect(cmd, **kwargs):
joined = " ".join(str(c) for c in cmd)
if "rev-parse" in joined and "--abbrev-ref" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout=f"{branch}\n", stderr="")
if "rev-parse" in joined and "--verify" in joined:
return subprocess.CompletedProcess(
cmd, 0 if verify_ok else 128, stdout="", stderr=""
)
if "rev-list" in joined:
return subprocess.CompletedProcess(
cmd, 0, stdout=f"{commit_count}\n", stderr=""
)
# `git status --porcelain` for dirty-tree detection during autostash.
if "status" in joined and "--porcelain" in joined:
out = " M hermes_cli/main.py\n" if dirty else ""
return subprocess.CompletedProcess(cmd, 0, stdout=out, stderr="")
# `git stash list` — return a stash ref when dirty (so _stash_local_changes
# gets something to return). _stash_local_changes_if_needed is what we
# actually patch in tests that exercise restore, so this is a catch-all.
if "stash" in joined and "list" in joined:
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
return subprocess.CompletedProcess(cmd, 0, stdout="", stderr="")
return side_effect
class TestUpdateYesConfigMigration:
"""--yes auto-answers the config-migration prompt and skips API-key prompts."""
@patch("hermes_cli.config.migrate_config")
@patch("hermes_cli.config.check_config_version", return_value=(1, 2))
@patch("hermes_cli.config.get_missing_config_fields", return_value=[])
@patch("hermes_cli.config.get_missing_env_vars", return_value=["NEW_KEY"])
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_yes_auto_migrates_without_input(
self,
mock_run,
_mock_which,
_mock_missing_env,
_mock_missing_cfg,
_mock_version,
mock_migrate,
capsys,
):
mock_run.side_effect = _make_run_side_effect(
branch="main", verify_ok=True, commit_count="1"
)
mock_migrate.return_value = {"env_added": [], "config_added": []}
args = SimpleNamespace(yes=True)
with patch("builtins.input") as mock_input:
cmd_update(args)
# Never prompted the user.
mock_input.assert_not_called()
# migrate_config was invoked with interactive=False — API-key prompts
# are suppressed, matching gateway-mode semantics.
assert mock_migrate.call_count == 1
_, kwargs = mock_migrate.call_args
assert kwargs.get("interactive") is False
out = capsys.readouterr().out
assert "--yes: auto-applying config migration" in out
# The "Would you like to configure them now?" prompt text never appears.
assert "Would you like to configure them now?" not in out
@patch("hermes_cli.config.migrate_config")
@patch("hermes_cli.config.check_config_version", return_value=(1, 2))
@patch("hermes_cli.config.get_missing_config_fields", return_value=[])
@patch("hermes_cli.config.get_missing_env_vars", return_value=["NEW_KEY"])
@patch("shutil.which", return_value=None)
@patch("subprocess.run")
def test_no_yes_flag_still_prompts_in_tty(
self,
mock_run,
_mock_which,
_mock_missing_env,
_mock_missing_cfg,
_mock_version,
mock_migrate,
capsys,
):
"""Regression guard: without --yes, the TTY prompt path still fires."""
mock_run.side_effect = _make_run_side_effect(
branch="main", verify_ok=True, commit_count="1"
)
mock_migrate.return_value = {"env_added": [], "config_added": []}
args = SimpleNamespace(yes=False)
# Patch ``sys.stdin.isatty`` and ``sys.stdout.isatty`` directly on the
# real ``sys`` module instead of replacing ``hermes_cli.main.sys`` with
# a MagicMock. The MagicMock approach was flaky under ``pytest-xdist``
# — a sibling test that imported ``hermes_cli.main`` first could leave
# a different ``sys`` reference resolved inside the function and the
# mock would never be consulted, with CI then taking the
# "Non-interactive session" branch instead of prompting.
import sys as _sys
with patch("builtins.input", return_value="n") as mock_input, patch.object(
_sys.stdin, "isatty", return_value=True
), patch.object(_sys.stdout, "isatty", return_value=True):
cmd_update(args)
# The user was actually prompted.
assert mock_input.called
prompts = [c.args[0] if c.args else "" for c in mock_input.call_args_list]
assert any("configure them now" in p for p in prompts)
class TestUpdateYesStashRestore:
"""--yes auto-restores the pre-update autostash without prompting."""

View file

@ -839,3 +839,148 @@ def test_get_named_custom_provider_transport_resolves_via_display_name(monkeypat
result = rp._get_named_custom_provider("Codex Provider")
assert result is not None
assert result["api_mode"] == "codex_responses"
# =============================================================================
# Regression: user_providers override for private models not listed by /v1/models
# =============================================================================
_REJECTED_VALIDATION = {
"accepted": False,
"persist": False,
"recognized": False,
"message": "not found",
}
def _run_user_provider_override_case(
*,
slug,
name,
base_url,
models,
raw_input,
):
"""Run ``switch_model`` with a private user provider and a rejected API check.
The bug in PR #17964 was that ``user_providers`` was treated like a list,
so private models listed in ``models:`` never triggered the override path.
These tests keep the validation failure in place and prove the config list
still wins for both dict- and list-shaped ``models`` entries.
"""
from unittest.mock import patch
user_providers = {
slug: {
"name": name,
"api": base_url,
"discover_models": False,
"models": models,
}
}
with patch("hermes_cli.model_switch.resolve_alias", return_value=None), \
patch("hermes_cli.model_switch.list_provider_models", return_value=[]), \
patch("hermes_cli.model_switch.normalize_model_for_provider", side_effect=lambda model, provider: model), \
patch("hermes_cli.models.validate_requested_model", return_value=_REJECTED_VALIDATION), \
patch("hermes_cli.models.detect_provider_for_model", return_value=None), \
patch("hermes_cli.model_switch.get_model_info", return_value=None), \
patch("hermes_cli.model_switch.get_model_capabilities", return_value=None), \
patch("hermes_cli.runtime_provider.resolve_runtime_provider", return_value={"api_key": "***", "base_url": base_url, "api_mode": "anthropic_messages"}):
return switch_model(
raw_input=raw_input,
current_provider=slug,
current_model="old-model",
current_base_url=base_url,
user_providers=user_providers,
custom_providers=[],
)
@pytest.mark.parametrize(
("slug", "name", "base_url", "models", "raw_input", "expected_model"),
[
(
"kimi-coding",
"Kimi Coding Plan",
"https://api.kimi.com/coding",
{"kimi-k2.6": {}},
"kimi-k2.6",
"kimi-k2.6",
),
(
"kimi-dedicated",
"Kimi Dedicated",
"https://api.kimi.com/v1",
[{"name": "moonshotai/Kimi-K2.6-ACED"}],
"moonshotai/Kimi-K2.6-ACED",
"moonshotai/Kimi-K2.6-ACED",
),
],
ids=["kimi-coding-plan-dict", "kimi-k2-6-aced-list"],
)
def test_user_provider_override_accepts_listed_private_models(
slug,
name,
base_url,
models,
raw_input,
expected_model,
):
"""Private models listed in providers: config should override /v1/models misses.
Covers both config shapes the fix now accepts:
- dict models for the Kimi Coding Plan K2p6 case
- list-of-dicts models for the Kimi-K2.6-ACED dedicated case
"""
result = _run_user_provider_override_case(
slug=slug,
name=name,
base_url=base_url,
models=models,
raw_input=raw_input,
)
assert result.success is True
assert result.new_model == expected_model
assert result.error_message == ""
@pytest.mark.parametrize(
("slug", "name", "base_url", "models", "raw_input"),
[
(
"kimi-coding",
"Kimi Coding Plan",
"https://api.kimi.com/coding",
{"kimi-k2.6": {}},
"kimi-k2.6-mangled",
),
(
"kimi-dedicated",
"Kimi Dedicated",
"https://api.kimi.com/v1",
[{"name": "moonshotai/Kimi-K2.6-ACED"}],
"moonshotai/Kimi-K2.6-ACED!!!",
),
],
ids=["kimi-coding-plan-dict-mangled", "kimi-k2-6-aced-list-mangled"],
)
def test_user_provider_override_rejects_mangled_private_models(
slug,
name,
base_url,
models,
raw_input,
):
"""Malformed model names should fail cleanly, not crash or auto-accept."""
result = _run_user_provider_override_case(
slug=slug,
name=name,
base_url=base_url,
models=models,
raw_input=raw_input,
)
assert result.success is False
assert result.error_message == "not found"

View file

@ -31,6 +31,243 @@ class TestPublicAPI:
assert callable(speak_text)
class TestNormalizeVoiceRecordKeyForPromptToolkit:
"""Round-9 Copilot review regression on #19835.
Classic CLI only normalized ``ctrl+`` / ``alt+``, so TUI-valid
aliases like ``control+``, ``option+``, ``opt+`` silently bound a
different (or no) shortcut in the CLI. Normalizer now maps the
same set of aliases the TUI parser accepts, so one config value
binds identically in both runtimes.
"""
def test_ctrl_and_alt_map_to_prompt_toolkit_form(self):
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("ctrl+b") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("alt+r") == "a-r"
def test_control_option_opt_aliases_match_tui_parser(self):
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("control+o") == "c-o"
assert normalize_voice_record_key_for_prompt_toolkit("option+space") == "a-space"
assert normalize_voice_record_key_for_prompt_toolkit("opt+enter") == "a-enter"
def test_case_insensitive(self):
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("Ctrl+B") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("CONTROL+O") == "c-o"
def test_non_string_falls_back_to_default(self):
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit(None) == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit(1) == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit(True) == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit({}) == "c-b"
def test_empty_string_falls_back(self):
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("") == "c-b"
def test_super_win_fall_back_to_default_in_cli(self):
"""prompt_toolkit has no super modifier, so ``super+b`` / ``win+o``
would crash the classic CLI at startup if passed through. Fall
back to the documented default; the CLI binding site is
expected to warn so users know the shortcut is TUI-only
(Copilot round-11 on #19835)."""
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("super+b") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("win+o") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("windows+o") == "c-b"
# Round-10 Copilot review regressions on #19835.
def test_strips_whitespace_within_and_around(self):
"""``ctrl + b`` / `` option + space `` are accepted by the TUI
parser; the CLI normalizer must mirror that or the same config
binds different shortcuts across runtimes."""
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("ctrl + b") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit(" option + space ") == "a-space"
def test_named_key_aliases_collapse_to_prompt_toolkit_canonical(self):
"""TUI accepts ``return`` / ``esc`` / ``bs`` / ``del`` etc.;
CLI must collapse to prompt_toolkit's canonical spelling
(``enter`` / ``escape`` / ``backspace`` / ``delete``)."""
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("ctrl+return") == "c-enter"
assert normalize_voice_record_key_for_prompt_toolkit("ctrl+esc") == "c-escape"
assert normalize_voice_record_key_for_prompt_toolkit("ctrl+bs") == "c-backspace"
assert normalize_voice_record_key_for_prompt_toolkit("alt+del") == "a-delete"
def test_typoed_named_keys_fall_back_to_default(self):
"""``ctrl+spcae`` would otherwise pass through as ``c-spcae`` and
prompt_toolkit would reject it at startup fall back instead."""
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("ctrl+spcae") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("ctrl+f5") == "c-b"
def test_bare_char_and_multi_modifier_fall_back(self):
"""TUI parser rejects bare-char (``o``) and multi-modifier
(``ctrl+alt+r``) configs; the CLI normalizer must match."""
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("o") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("b") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("ctrl+alt+r") == "c-b"
def test_reserved_ctrl_chars_fall_back(self):
"""``ctrl+c`` / ``ctrl+d`` / ``ctrl+l`` are always claimed by
the CLI's prompt_toolkit input layer or terminal driver; match
the TUI parser's rejection to keep /voice status honest."""
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("ctrl+c") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("ctrl+d") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("ctrl+l") == "c-b"
def test_unknown_modifier_falls_back(self):
"""``meta+b`` is ambiguous on the wire (Alt on xterm, Cmd on
legacy macOS), same class as the TUI parser's rejection."""
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("meta+b") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("shift+b") == "c-b"
# Round-14 Copilot review regression on #19835. On macOS the TUI
# parser rejects alt+c/d/l because hermes-ink reports Alt as
# ``key.meta`` and isActionMod(darwin) accepts it. The CLI
# normalizer must mirror that platform-gated rejection so shared
# configs like ``option+c`` don't bind Alt+C in the CLI while the
# TUI falls back to Ctrl+B.
def test_alt_cdl_rejected_on_macos(self, monkeypatch):
monkeypatch.setattr("sys.platform", "darwin")
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("alt+c") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("alt+d") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("alt+l") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("option+c") == "c-b"
assert normalize_voice_record_key_for_prompt_toolkit("opt+d") == "c-b"
# Other alt letters still bind on darwin.
assert normalize_voice_record_key_for_prompt_toolkit("alt+r") == "a-r"
assert normalize_voice_record_key_for_prompt_toolkit("alt+space") == "a-space"
def test_alt_cdl_allowed_on_non_macos(self, monkeypatch):
monkeypatch.setattr("sys.platform", "linux")
from hermes_cli.voice import normalize_voice_record_key_for_prompt_toolkit
assert normalize_voice_record_key_for_prompt_toolkit("alt+c") == "a-c"
assert normalize_voice_record_key_for_prompt_toolkit("alt+d") == "a-d"
assert normalize_voice_record_key_for_prompt_toolkit("alt+l") == "a-l"
class TestVoiceRecordKeyFromConfig:
"""Round-11 Copilot review regression on #19835.
``load_config()`` preserves YAML scalar overrides, so a hand-edited
``voice: true`` or ``voice: cmd+b`` made the naive
``cfg.get('voice', {}).get('record_key')`` chain raise
AttributeError before voice could run. The shape-safe extractor
returns None for every malformed shape so the call-site fallback
(``normalize_`` / ``format_``) surfaces the documented default.
"""
def test_dict_voice_with_string_record_key(self):
from hermes_cli.voice import voice_record_key_from_config
assert voice_record_key_from_config({"voice": {"record_key": "ctrl+o"}}) == "ctrl+o"
def test_non_dict_config_root(self):
from hermes_cli.voice import voice_record_key_from_config
for bad_root in (None, True, 1, "ctrl+b", [], ["ctrl+b"]):
assert voice_record_key_from_config(bad_root) is None, bad_root
def test_non_dict_voice_entry(self):
from hermes_cli.voice import voice_record_key_from_config
for bad_voice in (None, True, "cmd+b", 42, ["ctrl+b"]):
assert voice_record_key_from_config({"voice": bad_voice}) is None, bad_voice
def test_missing_record_key_returns_none(self):
from hermes_cli.voice import voice_record_key_from_config
assert voice_record_key_from_config({"voice": {"beep_enabled": True}}) is None
assert voice_record_key_from_config({}) is None
def test_normalizer_accepts_extractor_output_directly(self):
"""voice_record_key_from_config + normalize_… must compose —
None / non-string scalars all fall back to c-b."""
from hermes_cli.voice import (
normalize_voice_record_key_for_prompt_toolkit,
voice_record_key_from_config,
)
for raw in (None, True, 1, "cmd+b", ["ctrl+b"]):
extracted = voice_record_key_from_config({"voice": raw})
assert normalize_voice_record_key_for_prompt_toolkit(extracted) == "c-b"
class TestFormatVoiceRecordKeyForStatus:
"""Round-10 Copilot review regression on #19835.
``/voice status`` used to print the raw scalar (``True`` / ``1``)
for non-string configs even though the actual binding falls back
to Ctrl+B. The formatter routes through the same normalizer so
status always matches what the CLI actually binds.
"""
def test_ctrl_and_alt_letter_keys_render_canonically(self):
from hermes_cli.voice import format_voice_record_key_for_status
assert format_voice_record_key_for_status("ctrl+b") == "Ctrl+B"
assert format_voice_record_key_for_status("ctrl+o") == "Ctrl+O"
assert format_voice_record_key_for_status("alt+r") == "Alt+R"
def test_named_keys_render_in_title_case(self):
from hermes_cli.voice import format_voice_record_key_for_status
assert format_voice_record_key_for_status("ctrl+space") == "Ctrl+Space"
assert format_voice_record_key_for_status("alt+enter") == "Alt+Enter"
assert format_voice_record_key_for_status("ctrl+esc") == "Ctrl+Escape"
def test_aliases_render_via_normalized_form(self):
from hermes_cli.voice import format_voice_record_key_for_status
assert format_voice_record_key_for_status("control+o") == "Ctrl+O"
assert format_voice_record_key_for_status("option+space") == "Alt+Space"
assert format_voice_record_key_for_status("opt+enter") == "Alt+Enter"
def test_non_string_scalar_falls_back_to_ctrl_b_label(self):
from hermes_cli.voice import format_voice_record_key_for_status
# Copilot round-10 regression: previously /voice status printed
# the raw scalar ("True" / "1") even though the actual binding
# fell back to Ctrl+B.
assert format_voice_record_key_for_status(True) == "Ctrl+B"
assert format_voice_record_key_for_status(1) == "Ctrl+B"
assert format_voice_record_key_for_status(None) == "Ctrl+B"
assert format_voice_record_key_for_status({}) == "Ctrl+B"
def test_malformed_configs_fall_back_to_ctrl_b(self):
from hermes_cli.voice import format_voice_record_key_for_status
assert format_voice_record_key_for_status("ctrl+spcae") == "Ctrl+B"
assert format_voice_record_key_for_status("ctrl+alt+r") == "Ctrl+B"
assert format_voice_record_key_for_status("") == "Ctrl+B"
assert format_voice_record_key_for_status(" ") == "Ctrl+B"
class TestStopWithoutStart:
def test_returns_none_when_no_recording_active(self, monkeypatch):
"""Idempotent no-op: stop before start must not raise or touch state."""
@ -72,6 +309,7 @@ class TestContinuousAPI:
# Isolate from any state left behind by other tests in the session.
monkeypatch.setattr(voice, "_continuous_active", False)
monkeypatch.setattr(voice, "_continuous_stopping", False, raising=False)
monkeypatch.setattr(voice, "_continuous_recorder", None)
assert voice.is_continuous_active() is False
@ -106,11 +344,20 @@ class TestContinuousAPI:
monkeypatch.setattr(voice, "_continuous_recorder", FakeRecorder())
voice.start_continuous(on_transcript=lambda _t: None)
started = voice.start_continuous(on_transcript=lambda _t: None)
# The guard inside start_continuous short-circuits before rec.start()
assert started is True
assert called["n"] == 0
def test_start_returns_false_while_stopping(self, monkeypatch):
import hermes_cli.voice as voice
monkeypatch.setattr(voice, "_continuous_active", False)
monkeypatch.setattr(voice, "_continuous_stopping", True, raising=False)
assert voice.start_continuous(on_transcript=lambda _t: None) is False
class TestContinuousLoopSimulation:
"""End-to-end simulation of the VAD loop with a fake recorder.
@ -131,6 +378,8 @@ class TestContinuousLoopSimulation:
monkeypatch.setattr(voice, "_continuous_on_transcript", None)
monkeypatch.setattr(voice, "_continuous_on_status", None)
monkeypatch.setattr(voice, "_continuous_on_silent_limit", None)
monkeypatch.setattr(voice, "_continuous_auto_restart", True, raising=False)
monkeypatch.setattr(voice, "_play_beep", lambda *_, **__: None)
class FakeRecorder:
_silence_threshold = 200
@ -144,13 +393,20 @@ class TestContinuousLoopSimulation:
self.cancelled = 0
# Preset WAV path returned by stop()
self.next_stop_wav = "/tmp/fake.wav"
self.fail_stop = False
self.fail_next_start = False
def start(self, on_silence_stop=None):
if self.fail_next_start:
self.fail_next_start = False
raise RuntimeError("boom")
self.start_calls += 1
self.last_callback = on_silence_stop
self.is_recording = True
def stop(self):
if self.fail_stop:
raise RuntimeError("stop failed")
self.stopped += 1
self.is_recording = False
return self.next_stop_wav
@ -196,6 +452,204 @@ class TestContinuousLoopSimulation:
voice.stop_continuous()
def test_auto_restart_false_stops_after_first_transcript(self, fake_recorder, monkeypatch):
import hermes_cli.voice as voice
monkeypatch.setattr(
voice,
"transcribe_recording",
lambda _p: {"success": True, "transcript": "single shot"},
)
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
transcripts = []
statuses = []
voice.start_continuous(
on_transcript=lambda t: transcripts.append(t),
on_status=lambda s: statuses.append(s),
auto_restart=False,
)
fake_recorder.last_callback()
assert transcripts == ["single shot"]
assert fake_recorder.start_calls == 1
assert statuses == ["listening", "transcribing", "idle"]
assert voice.is_continuous_active() is False
def test_auto_restart_false_retains_silent_strikes_across_starts(
self, fake_recorder, monkeypatch
):
import hermes_cli.voice as voice
monkeypatch.setattr(
voice,
"transcribe_recording",
lambda _p: {"success": True, "transcript": ""},
)
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
silent_limit_fired = []
for _ in range(3):
voice.start_continuous(
on_transcript=lambda _t: None,
on_silent_limit=lambda: silent_limit_fired.append(True),
auto_restart=False,
)
fake_recorder.last_callback()
assert silent_limit_fired == [True]
assert voice.is_continuous_active() is False
assert fake_recorder.start_calls == 3
def test_force_transcribe_stop_delivers_current_buffer(self, fake_recorder, monkeypatch):
import hermes_cli.voice as voice
class ImmediateThread:
def __init__(self, target, daemon=False):
self.target = target
def start(self):
self.target()
monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
monkeypatch.setattr(
voice,
"transcribe_recording",
lambda _p: {"success": True, "transcript": "manual stop"},
)
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
transcripts = []
statuses = []
voice.start_continuous(
on_transcript=lambda t: transcripts.append(t),
on_status=lambda s: statuses.append(s),
)
voice.stop_continuous(force_transcribe=True)
assert fake_recorder.stopped == 1
assert transcripts == ["manual stop"]
assert statuses == ["listening", "transcribing", "idle"]
assert voice.is_continuous_active() is False
def test_force_transcribe_empty_single_shots_hit_silent_limit(
self, fake_recorder, monkeypatch
):
import hermes_cli.voice as voice
class ImmediateThread:
def __init__(self, target, daemon=False):
self.target = target
def start(self):
self.target()
monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
monkeypatch.setattr(
voice,
"transcribe_recording",
lambda _p: {"success": True, "transcript": ""},
)
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
silent_limit_fired = []
for _ in range(3):
voice.start_continuous(
on_transcript=lambda _t: None,
on_silent_limit=lambda: silent_limit_fired.append(True),
auto_restart=False,
)
voice.stop_continuous(force_transcribe=True)
assert silent_limit_fired == [True]
assert fake_recorder.stopped == 3
assert voice._continuous_no_speech_count == 0
def test_force_transcribe_valid_single_shot_resets_silent_strikes(
self, fake_recorder, monkeypatch
):
import hermes_cli.voice as voice
class ImmediateThread:
def __init__(self, target, daemon=False):
self.target = target
def start(self):
self.target()
monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
monkeypatch.setattr(voice, "_continuous_no_speech_count", 2)
monkeypatch.setattr(
voice,
"transcribe_recording",
lambda _p: {"success": True, "transcript": "manual stop"},
)
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
transcripts = []
silent_limit_fired = []
voice.start_continuous(
on_transcript=lambda t: transcripts.append(t),
on_silent_limit=lambda: silent_limit_fired.append(True),
auto_restart=False,
)
voice.stop_continuous(force_transcribe=True)
assert transcripts == ["manual stop"]
assert silent_limit_fired == []
assert voice._continuous_no_speech_count == 0
def test_force_transcribe_stop_failure_cancels_and_clears_stopping(
self, fake_recorder, monkeypatch
):
import hermes_cli.voice as voice
class ImmediateThread:
def __init__(self, target, daemon=False):
self.target = target
def start(self):
self.target()
monkeypatch.setattr(voice.threading, "Thread", ImmediateThread)
fake_recorder.fail_stop = True
statuses = []
voice.start_continuous(
on_transcript=lambda _t: None,
on_status=lambda s: statuses.append(s),
)
voice.stop_continuous(force_transcribe=True)
assert fake_recorder.cancelled == 1
assert statuses == ["listening", "transcribing", "idle"]
assert voice.is_continuous_active() is False
assert voice._continuous_stopping is False
def test_restart_failure_reports_idle(self, fake_recorder, monkeypatch):
import hermes_cli.voice as voice
monkeypatch.setattr(
voice,
"transcribe_recording",
lambda _p: {"success": True, "transcript": "hello world"},
)
monkeypatch.setattr(voice, "is_whisper_hallucination", lambda _t: False)
statuses = []
voice.start_continuous(on_transcript=lambda _t: None, on_status=statuses.append)
fake_recorder.fail_next_start = True
fake_recorder.last_callback()
assert statuses == ["listening", "transcribing", "idle"]
assert voice.is_continuous_active() is False
def test_silent_limit_halts_loop_after_three_strikes(self, fake_recorder, monkeypatch):
import hermes_cli.voice as voice

View file

@ -1826,6 +1826,117 @@ class TestNormaliseThemeExtensions:
assert r["componentStyles"]["card"] == {"opacity": "0.8", "zIndex": "5"}
class TestPluginAPIAuth:
"""Tests that plugin API routes require the session token (issue #19533)."""
@pytest.fixture(autouse=True)
def _setup_test_client(self, monkeypatch, _isolate_hermes_home):
"""Create a TestClient without the session token header."""
try:
from starlette.testclient import TestClient
except ImportError:
pytest.skip("fastapi/starlette not installed")
import hermes_state
from hermes_constants import get_hermes_home
from hermes_cli.web_server import app, _SESSION_HEADER_NAME, _SESSION_TOKEN
monkeypatch.setattr(hermes_state, "DEFAULT_DB_PATH", get_hermes_home() / "state.db")
self.client = TestClient(app)
self.auth_client = TestClient(app)
self.auth_client.headers[_SESSION_HEADER_NAME] = _SESSION_TOKEN
def test_plugin_route_requires_auth(self):
"""Plugin API routes should return 401 without a valid session token."""
# Use a known plugin route (kanban board)
resp = self.client.get("/api/plugins/kanban/board")
assert resp.status_code == 401
def test_plugin_route_allows_auth(self):
"""Plugin API routes should work with a valid session token.
Use ``/api/plugins/example/hello`` from the example-dashboard plugin
a stable, side-effect-free GET that's always loaded in tests. With a
valid token the handler should run (200); without one the middleware
should 401 before the handler is reached.
"""
# Without auth: middleware blocks before reaching the handler.
resp = self.client.get("/api/plugins/example/hello")
assert resp.status_code == 401
# With auth: handler runs.
resp = self.auth_client.get("/api/plugins/example/hello")
assert resp.status_code == 200
def test_plugin_post_requires_auth(self):
"""Plugin POST routes should return 401 without a valid session token."""
resp = self.client.post("/api/plugins/kanban/tasks", json={"title": "test"})
assert resp.status_code == 401
def test_plugin_patch_requires_auth(self):
"""Plugin PATCH routes should return 401 without a valid session token.
PATCH is the mutation method most commonly used by the dashboard for
kanban task edits explicitly cover it so a future middleware
regression that whitelists non-GET methods can't sneak through.
"""
resp = self.client.patch(
"/api/plugins/kanban/tasks/t_fake",
json={"title": "renamed"},
)
assert resp.status_code == 401
def test_plugin_delete_requires_auth(self):
"""Plugin DELETE routes should return 401 without a valid session token."""
resp = self.client.delete("/api/plugins/kanban/tasks/t_fake")
assert resp.status_code == 401
def test_non_kanban_plugin_route_requires_auth(self):
"""Auth must be plugin-agnostic, not kanban-specific.
The middleware fix is at the gate level (no per-plugin allowlist),
so any plugin's API surface — kanban, hermes-achievements, future
plugins must require the session token. Hit a non-kanban plugin
path to lock that in.
"""
# Real plugin path (hermes-achievements is loaded by default).
resp = self.client.get("/api/plugins/hermes-achievements/overview")
assert resp.status_code == 401
# Same for an arbitrary plugin namespace that doesn't even exist —
# the middleware should 401 before routing decides 404, so an
# attacker can't fingerprint plugin names by status codes.
resp = self.client.get("/api/plugins/_definitely_not_a_plugin_/anything")
assert resp.status_code == 401
def test_plugin_websocket_unaffected_by_http_middleware(self):
"""The kanban /events WebSocket has its own ``?token=`` check;
the HTTP middleware change must not start gating WS upgrades.
Starlette doesn't run HTTP middleware on WebSocket upgrades anyway,
but pin the behavior so a future refactor that moves auth into a
shared layer can't silently break the WS auth contract.
"""
from starlette.websockets import WebSocketDisconnect
from hermes_cli.web_server import _SESSION_TOKEN
# Without a token the WS endpoint must close the upgrade itself
# (its own _check_ws_token), NOT 401 from the HTTP middleware.
try:
with self.client.websocket_connect(
"/api/plugins/kanban/events"
):
pass # if we got here without disconnect, the WS accepted us
except WebSocketDisconnect:
pass # expected — WS endpoint rejected via its own check
except Exception:
# The kanban plugin may not be mounted in this test environment,
# in which case the route doesn't exist at all (3xx/4xx during
# upgrade). That's fine for this regression — it only matters
# that the HTTP middleware didn't start intercepting WS upgrades.
pass
class TestDashboardPluginManifestExtensions:
"""Tests for the extended plugin manifest fields (tab.override,
tab.hidden, slots) read by _discover_dashboard_plugins()."""

View file

@ -13,7 +13,7 @@ from unittest.mock import patch
import pytest
from hermes_cli.main import _web_ui_build_needed, _build_web_ui
from hermes_cli.main import _web_ui_build_needed, _build_web_ui, _run_npm_install_deterministic
def _touch(path: Path, offset: float = 0.0) -> None:
@ -119,3 +119,92 @@ class TestBuildWebUISkipsWhenFresh:
assert result is True
assert mock_run.call_count == 2 # npm install + npm run build
def test_npm_install_uses_utf8_replace_output_decoding(self, tmp_path):
web_dir, _ = _make_web_dir(tmp_path)
(web_dir / "package-lock.json").write_text("{}", encoding="utf-8")
mock_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
with patch("hermes_cli.main.subprocess.run", return_value=mock_cp) as mock_run:
result = _run_npm_install_deterministic("/usr/bin/npm", web_dir)
assert result.returncode == 0
_, kwargs = mock_run.call_args
assert kwargs["text"] is True
assert kwargs["encoding"] == "utf-8"
assert kwargs["errors"] == "replace"
def test_web_build_uses_utf8_replace_output_decoding(self, tmp_path):
web_dir, _ = _make_web_dir(tmp_path)
mock_cp = __import__("subprocess").CompletedProcess([], 0, stdout="", stderr="")
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main.subprocess.run", side_effect=[mock_cp, mock_cp]) as mock_run:
result = _build_web_ui(web_dir)
assert result is True
_, build_kwargs = mock_run.call_args_list[1]
assert build_kwargs["text"] is True
assert build_kwargs["encoding"] == "utf-8"
assert build_kwargs["errors"] == "replace"
class TestBuildWebUIRetryAndStaleFallback:
"""Coverage for the retry + stale-dist fallback added in #23824 / issue #23817."""
def test_retries_build_once_on_failure(self, tmp_path):
web_dir, _ = _make_web_dir(tmp_path)
Subprocess = __import__("subprocess")
# install: success; build attempt 1: fail; build attempt 2: success
install_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
build_fail = Subprocess.CompletedProcess([], 1, stdout="", stderr="EPERM")
build_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main._time.sleep") as mock_sleep, \
patch("hermes_cli.main.subprocess.run",
side_effect=[install_ok, build_fail, build_ok]) as mock_run:
result = _build_web_ui(web_dir)
assert result is True
assert mock_run.call_count == 3 # install + build + retry
mock_sleep.assert_called_once_with(3)
def test_falls_back_to_stale_dist_when_retry_also_fails(self, tmp_path, capsys):
web_dir, dist_dir = _make_web_dir(tmp_path)
# Stale dist exists but is older than source
_touch(dist_dir / "index.html", offset=-100)
_touch(web_dir / "src" / "App.tsx") # newer source -> build_needed=True
Subprocess = __import__("subprocess")
install_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
build_fail = Subprocess.CompletedProcess([], 1, stdout="", stderr="vite ENOMEM")
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main._time.sleep"), \
patch("hermes_cli.main.subprocess.run",
side_effect=[install_ok, build_fail, build_fail]):
result = _build_web_ui(web_dir, fatal=True)
# MUST return True (serve stale) — issue #23817 — even with fatal=True,
# because cmd_dashboard passes fatal=True and is the primary caller.
assert result is True
out = capsys.readouterr().out
assert "serving stale dist as fallback" in out
assert "vite ENOMEM" in out # stderr surfaced to user
def test_hard_fails_when_no_dist_to_fall_back_to(self, tmp_path, capsys):
web_dir, _ = _make_web_dir(tmp_path)
Subprocess = __import__("subprocess")
install_ok = Subprocess.CompletedProcess([], 0, stdout="", stderr="")
build_fail = Subprocess.CompletedProcess([], 1, stdout="", stderr="vite ENOMEM")
with patch("hermes_cli.main.shutil.which", return_value="/usr/bin/npm"), \
patch("hermes_cli.main._time.sleep"), \
patch("hermes_cli.main.subprocess.run",
side_effect=[install_ok, build_fail, build_fail]):
result = _build_web_ui(web_dir, fatal=True)
assert result is False
out = capsys.readouterr().out
assert "Web UI build failed" in out
assert "vite ENOMEM" in out
assert "Run manually" in out