mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
Policy: if it ain't a secret it goes in config.yaml. HERMES_INFERENCE_PROVIDER was leaking behavioral config into the .env surface, including from the gateway, which bypassed config.yaml entirely. Behavior: - gateway/run.py: drop HERMES_INFERENCE_PROVIDER read in _resolve_runtime_agent_kwargs. Gateway now flows through resolve_runtime_provider() with no `requested` override, which reads model.provider from config.yaml first. Docs/UX (strip env var from user-facing surface): - --provider help text no longer mentions the env var - cli-config.yaml.example same - reference/environment-variables.md: remove HERMES_INFERENCE_PROVIDER row and the cross-reference from HERMES_INFERENCE_MODEL - reference/cli-commands.md: blank the env-var column for --provider - guides/xai-grok-oauth.md, guides/minimax-oauth.md: replace HERMES_INFERENCE_PROVIDER=x hermes invocations with config.yaml / --provider - developer-guide/adding-providers.md, model-provider-plugin.md: reframe Internal mechanism (kept as-is): - hermes_cli/main.py writes HERMES_INFERENCE_PROVIDER into the TUI subprocess env - tui_gateway/server.py reads it on TUI startup - resolve_requested_provider() / oneshot.py / cli.py still fall through to the env var as a last-resort behind config.yaml, which is what makes the TUI parent->child handoff work This stays. We just stop documenting it as a user knob. Tests: tests/gateway/test_auth_fallback.py — simplify mock to fail on first call, succeed on second; drop monkeypatch.setenv lines that no longer matter. Supersedes #31064 (closed with credit to @novax635 who surfaced the underlying issue but proposed aligning gateway *to* the env var rather than removing it).
116 lines
4.3 KiB
Python
116 lines
4.3 KiB
Python
"""Test that AuthError triggers fallback provider resolution (#7230)."""
|
|
|
|
import os
|
|
from unittest.mock import patch, MagicMock
|
|
|
|
import pytest
|
|
|
|
|
|
class TestResolveRuntimeAgentKwargsAuthFallback:
|
|
"""_resolve_runtime_agent_kwargs should try fallback on AuthError."""
|
|
|
|
def test_auth_error_tries_fallback(self, tmp_path, monkeypatch):
|
|
"""When primary provider raises AuthError, fallback is attempted."""
|
|
from hermes_cli.auth import AuthError
|
|
|
|
# Create a config with fallback
|
|
config_path = tmp_path / "config.yaml"
|
|
config_path.write_text(
|
|
"model:\n provider: openai-codex\n"
|
|
"fallback_model:\n provider: openrouter\n"
|
|
" model: meta-llama/llama-4-maverick\n"
|
|
)
|
|
|
|
monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
|
|
|
|
call_count = {"n": 0}
|
|
|
|
def _mock_resolve(**kwargs):
|
|
call_count["n"] += 1
|
|
# First call = primary path (gateway reads model.provider from
|
|
# config.yaml internally; we simulate the auth failure here).
|
|
# Second call = fallback path with explicit_api_key + explicit_base_url
|
|
# supplied by gateway from fallback_model config.
|
|
if call_count["n"] == 1:
|
|
raise AuthError("Codex token refresh failed with status 401")
|
|
return {
|
|
"api_key": "fallback-key",
|
|
"base_url": "https://openrouter.ai/api/v1",
|
|
"provider": "openrouter",
|
|
"api_mode": "openai_chat",
|
|
"command": None,
|
|
"args": None,
|
|
"credential_pool": None,
|
|
}
|
|
|
|
with patch(
|
|
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
|
side_effect=_mock_resolve,
|
|
):
|
|
from gateway.run import _resolve_runtime_agent_kwargs
|
|
result = _resolve_runtime_agent_kwargs()
|
|
|
|
assert result["provider"] == "openrouter"
|
|
assert result["api_key"] == "fallback-key"
|
|
# Should have been called at least twice (primary + fallback)
|
|
assert call_count["n"] >= 2
|
|
|
|
def test_auth_error_no_fallback_raises(self, tmp_path, monkeypatch):
|
|
"""When primary fails and no fallback configured, RuntimeError is raised."""
|
|
from hermes_cli.auth import AuthError
|
|
|
|
config_path = tmp_path / "config.yaml"
|
|
config_path.write_text("model:\n provider: openai-codex\n")
|
|
|
|
monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
|
|
|
|
with patch(
|
|
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
|
side_effect=AuthError("token expired"),
|
|
):
|
|
from gateway.run import _resolve_runtime_agent_kwargs
|
|
with pytest.raises(RuntimeError):
|
|
_resolve_runtime_agent_kwargs()
|
|
|
|
def test_legacy_fallback_is_appended_after_fallback_providers(self, tmp_path, monkeypatch):
|
|
"""When both keys exist, the legacy entry still participates in resolution."""
|
|
config_path = tmp_path / "config.yaml"
|
|
config_path.write_text(
|
|
"fallback_providers:\n"
|
|
" - provider: openrouter\n"
|
|
" model: anthropic/claude-sonnet-4.6\n"
|
|
"fallback_model:\n"
|
|
" provider: nous\n"
|
|
" model: Hermes-4\n"
|
|
)
|
|
|
|
monkeypatch.setattr("gateway.run._hermes_home", tmp_path)
|
|
|
|
calls = []
|
|
|
|
def _mock_resolve(**kwargs):
|
|
requested = kwargs.get("requested")
|
|
calls.append(requested)
|
|
if requested == "openrouter":
|
|
raise RuntimeError("openrouter unavailable")
|
|
return {
|
|
"api_key": "nous-key",
|
|
"base_url": "https://portal.nousresearch.com/v1",
|
|
"provider": "nous",
|
|
"api_mode": "chat_completions",
|
|
"command": None,
|
|
"args": None,
|
|
"credential_pool": None,
|
|
}
|
|
|
|
with patch(
|
|
"hermes_cli.runtime_provider.resolve_runtime_provider",
|
|
side_effect=_mock_resolve,
|
|
):
|
|
from gateway.run import _try_resolve_fallback_provider
|
|
|
|
result = _try_resolve_fallback_provider()
|
|
|
|
assert calls == ["openrouter", "nous"]
|
|
assert result["provider"] == "nous"
|
|
assert result["model"] == "Hermes-4"
|