hermes-agent/tests/agent/test_auxiliary_client.py

1490 lines
64 KiB
Python

"""Tests for agent.auxiliary_client resolution chain, provider overrides, and model overrides."""
import json
import logging
import os
from pathlib import Path
from unittest.mock import patch, MagicMock, AsyncMock
import pytest
from agent.auxiliary_client import (
get_text_auxiliary_client,
get_available_vision_backends,
resolve_vision_provider_client,
resolve_provider_client,
auxiliary_max_tokens_param,
call_llm,
async_call_llm,
_read_codex_access_token,
_get_provider_chain,
_is_payment_error,
_normalize_aux_provider,
_try_payment_fallback,
_resolve_auto,
)
@pytest.fixture(autouse=True)
def _clean_env(monkeypatch):
"""Strip provider env vars so each test starts clean."""
for key in (
"OPENROUTER_API_KEY", "OPENAI_BASE_URL", "OPENAI_API_KEY",
"OPENAI_MODEL", "LLM_MODEL", "NOUS_INFERENCE_BASE_URL",
"ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN",
):
monkeypatch.delenv(key, raising=False)
@pytest.fixture
def codex_auth_dir(tmp_path, monkeypatch):
"""Provide a writable ~/.codex/ directory with a valid auth.json."""
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
auth_file = codex_dir / "auth.json"
auth_file.write_text(json.dumps({
"tokens": {
"access_token": "codex-test-token-abc123",
"refresh_token": "codex-refresh-xyz",
}
}))
monkeypatch.setattr(
"agent.auxiliary_client._read_codex_access_token",
lambda: "codex-test-token-abc123",
)
return codex_dir
class TestNormalizeAuxProvider:
def test_maps_github_copilot_aliases(self):
assert _normalize_aux_provider("github") == "copilot"
assert _normalize_aux_provider("github-copilot") == "copilot"
assert _normalize_aux_provider("github-models") == "copilot"
def test_maps_github_copilot_acp_aliases(self):
assert _normalize_aux_provider("github-copilot-acp") == "copilot-acp"
assert _normalize_aux_provider("copilot-acp-agent") == "copilot-acp"
class TestReadCodexAccessToken:
def test_valid_auth_store(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {
"openai-codex": {
"tokens": {"access_token": "tok-123", "refresh_token": "r-456"},
},
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
result = _read_codex_access_token()
assert result == "tok-123"
def test_pool_without_selected_entry_falls_back_to_auth_store(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
valid_jwt = "eyJhbGciOiJSUzI1NiJ9.eyJleHAiOjk5OTk5OTk5OTl9.sig"
with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)), \
patch("hermes_cli.auth._read_codex_tokens", return_value={
"tokens": {"access_token": valid_jwt, "refresh_token": "refresh"}
}):
result = _read_codex_access_token()
assert result == valid_jwt
def test_missing_returns_none(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({"version": 1, "providers": {}}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
result = _read_codex_access_token()
assert result is None
def test_empty_token_returns_none(self, tmp_path, monkeypatch):
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {
"openai-codex": {
"tokens": {"access_token": " ", "refresh_token": "r"},
},
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
result = _read_codex_access_token()
assert result is None
def test_malformed_json_returns_none(self, tmp_path):
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
(codex_dir / "auth.json").write_text("{bad json")
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
result = _read_codex_access_token()
assert result is None
def test_missing_tokens_key_returns_none(self, tmp_path):
codex_dir = tmp_path / ".codex"
codex_dir.mkdir()
(codex_dir / "auth.json").write_text(json.dumps({"other": "data"}))
with patch("agent.auxiliary_client.Path.home", return_value=tmp_path):
result = _read_codex_access_token()
assert result is None
def test_expired_jwt_returns_none(self, tmp_path, monkeypatch):
"""Expired JWT tokens should be skipped so auto chain continues."""
import base64
import time as _time
# Build a JWT with exp in the past
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
expired_jwt = f"{header}.{payload}.fakesig"
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {
"openai-codex": {
"tokens": {"access_token": expired_jwt, "refresh_token": "r"},
},
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
result = _read_codex_access_token()
assert result is None, "Expired JWT should return None"
def test_valid_jwt_returns_token(self, tmp_path, monkeypatch):
"""Non-expired JWT tokens should be returned."""
import base64
import time as _time
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
payload_data = json.dumps({"exp": int(_time.time()) + 3600}).encode()
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
valid_jwt = f"{header}.{payload}.fakesig"
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {
"openai-codex": {
"tokens": {"access_token": valid_jwt, "refresh_token": "r"},
},
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
result = _read_codex_access_token()
assert result == valid_jwt
def test_non_jwt_token_passes_through(self, tmp_path, monkeypatch):
"""Non-JWT tokens (no dots) should be returned as-is."""
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {
"openai-codex": {
"tokens": {"access_token": "plain-token-no-jwt", "refresh_token": "r"},
},
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
result = _read_codex_access_token()
assert result == "plain-token-no-jwt"
class TestAnthropicOAuthFlag:
"""Test that OAuth tokens get is_oauth=True in auxiliary Anthropic client."""
def test_oauth_token_sets_flag(self, monkeypatch):
"""OAuth tokens (sk-ant-oat01-*) should create client with is_oauth=True."""
monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-test-token")
with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
mock_build.return_value = MagicMock()
from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
client, model = _try_anthropic()
assert client is not None
assert isinstance(client, AnthropicAuxiliaryClient)
# The adapter inside should have is_oauth=True
adapter = client.chat.completions
assert adapter._is_oauth is True
def test_api_key_no_oauth_flag(self, monkeypatch):
"""Regular API keys (sk-ant-api-*) should create client with is_oauth=False."""
with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api03-testkey1234"), \
patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
mock_build.return_value = MagicMock()
from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
client, model = _try_anthropic()
assert client is not None
assert isinstance(client, AnthropicAuxiliaryClient)
adapter = client.chat.completions
assert adapter._is_oauth is False
def test_pool_entry_takes_priority_over_legacy_resolution(self):
class _Entry:
access_token = "sk-ant-oat01-pooled"
base_url = "https://api.anthropic.com"
class _Pool:
def has_credentials(self):
return True
def select(self):
return _Entry()
with (
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
patch("agent.anthropic_adapter.resolve_anthropic_token", side_effect=AssertionError("legacy path should not run")),
patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()) as mock_build,
):
from agent.auxiliary_client import _try_anthropic
client, model = _try_anthropic()
assert client is not None
assert model == "claude-haiku-4-5-20251001"
assert mock_build.call_args.args[0] == "sk-ant-oat01-pooled"
class TestTryCodex:
def test_pool_without_selected_entry_falls_back_to_auth_store(self):
with (
patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)),
patch("agent.auxiliary_client._read_codex_access_token", return_value="codex-auth-token"),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
mock_openai.return_value = MagicMock()
from agent.auxiliary_client import _try_codex
client, model = _try_codex()
assert client is not None
assert model == "gpt-5.2-codex"
assert mock_openai.call_args.kwargs["api_key"] == "codex-auth-token"
assert mock_openai.call_args.kwargs["base_url"] == "https://chatgpt.com/backend-api/codex"
class TestExpiredCodexFallback:
"""Test that expired Codex tokens don't block the auto chain."""
def test_expired_codex_falls_through_to_next(self, tmp_path, monkeypatch):
"""When Codex token is expired, auto chain should skip it and try next provider."""
import base64
import time as _time
# Expired Codex JWT
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
expired_jwt = f"{header}.{payload}.fakesig"
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {
"openai-codex": {
"tokens": {"access_token": expired_jwt, "refresh_token": "r"},
},
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# Set up Anthropic as fallback
monkeypatch.setenv("ANTHROPIC_TOKEN", "sk-ant-oat01-test-fallback")
with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
mock_build.return_value = MagicMock()
from agent.auxiliary_client import _resolve_auto, AnthropicAuxiliaryClient
client, model = _resolve_auto()
# Should NOT be Codex, should be Anthropic (or another available provider)
assert not isinstance(client, type(None)), "Should find a provider after expired Codex"
def test_expired_codex_openrouter_wins(self, tmp_path, monkeypatch):
"""With expired Codex + OpenRouter key, OpenRouter should win (1st in chain)."""
import base64
import time as _time
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
expired_jwt = f"{header}.{payload}.fakesig"
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {
"openai-codex": {
"tokens": {"access_token": expired_jwt, "refresh_token": "r"},
},
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
monkeypatch.setenv("OPENROUTER_API_KEY", "or-test-key")
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
mock_openai.return_value = MagicMock()
from agent.auxiliary_client import _resolve_auto
client, model = _resolve_auto()
assert client is not None
# OpenRouter is 1st in chain, should win
mock_openai.assert_called()
def test_expired_codex_custom_endpoint_wins(self, tmp_path, monkeypatch):
"""With expired Codex + custom endpoint (Ollama), custom should win (3rd in chain)."""
import base64
import time as _time
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
payload_data = json.dumps({"exp": int(_time.time()) - 3600}).encode()
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
expired_jwt = f"{header}.{payload}.fakesig"
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {
"openai-codex": {
"tokens": {"access_token": expired_jwt, "refresh_token": "r"},
},
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
# Simulate Ollama or custom endpoint
with patch("agent.auxiliary_client._resolve_custom_runtime",
return_value=("http://localhost:11434/v1", "sk-dummy")):
with patch("agent.auxiliary_client.OpenAI") as mock_openai:
mock_openai.return_value = MagicMock()
from agent.auxiliary_client import _resolve_auto
client, model = _resolve_auto()
assert client is not None
def test_hermes_oauth_file_sets_oauth_flag(self, monkeypatch):
"""OAuth-style tokens should get is_oauth=*** (token is not sk-ant-api-*)."""
# Mock resolve_anthropic_token to return an OAuth-style token
with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat-hermes-token"), \
patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
mock_build.return_value = MagicMock()
from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
client, model = _try_anthropic()
assert client is not None, "Should resolve token"
adapter = client.chat.completions
assert adapter._is_oauth is True, "Non-sk-ant-api token should set is_oauth=True"
def test_jwt_missing_exp_passes_through(self, tmp_path, monkeypatch):
"""JWT with valid JSON but no exp claim should pass through."""
import base64
header = base64.urlsafe_b64encode(b'{"alg":"RS256","typ":"JWT"}').rstrip(b"=").decode()
payload_data = json.dumps({"sub": "user123"}).encode() # no exp
payload = base64.urlsafe_b64encode(payload_data).rstrip(b"=").decode()
no_exp_jwt = f"{header}.{payload}.fakesig"
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {
"openai-codex": {
"tokens": {"access_token": no_exp_jwt, "refresh_token": "r"},
},
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
result = _read_codex_access_token()
assert result == no_exp_jwt, "JWT without exp should pass through"
def test_jwt_invalid_json_payload_passes_through(self, tmp_path, monkeypatch):
"""JWT with valid base64 but invalid JSON payload should pass through."""
import base64
header = base64.urlsafe_b64encode(b'{"alg":"RS256"}').rstrip(b"=").decode()
payload = base64.urlsafe_b64encode(b"not-json-content").rstrip(b"=").decode()
bad_jwt = f"{header}.{payload}.fakesig"
hermes_home = tmp_path / "hermes"
hermes_home.mkdir(parents=True, exist_ok=True)
(hermes_home / "auth.json").write_text(json.dumps({
"version": 1,
"providers": {
"openai-codex": {
"tokens": {"access_token": bad_jwt, "refresh_token": "r"},
},
},
}))
monkeypatch.setenv("HERMES_HOME", str(hermes_home))
result = _read_codex_access_token()
assert result == bad_jwt, "JWT with invalid JSON payload should pass through"
def test_claude_code_oauth_env_sets_flag(self, monkeypatch):
"""CLAUDE_CODE_OAUTH_TOKEN env var should get is_oauth=True."""
monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "sk-ant-oat-cc-test-token")
monkeypatch.delenv("ANTHROPIC_TOKEN", raising=False)
with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build:
mock_build.return_value = MagicMock()
from agent.auxiliary_client import _try_anthropic, AnthropicAuxiliaryClient
client, model = _try_anthropic()
assert client is not None
adapter = client.chat.completions
assert adapter._is_oauth is True
class TestExplicitProviderRouting:
"""Test explicit provider selection bypasses auto chain correctly."""
def test_explicit_anthropic_api_key(self, monkeypatch):
"""provider='anthropic' + regular API key should work with is_oauth=False."""
with patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-api-regular-key"), \
patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
mock_build.return_value = MagicMock()
client, model = resolve_provider_client("anthropic")
assert client is not None
adapter = client.chat.completions
assert adapter._is_oauth is False
def test_explicit_openrouter_pool_exhausted_logs_precise_warning(self, monkeypatch, caplog):
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
with patch("agent.auxiliary_client._select_pool_entry", return_value=(True, None)):
with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
client, model = resolve_provider_client("openrouter")
assert client is None
assert model is None
assert any(
"credential pool has no usable entries" in record.message
for record in caplog.records
)
assert not any(
"OPENROUTER_API_KEY not set" in record.message
for record in caplog.records
)
def test_explicit_openrouter_missing_env_keeps_not_set_warning(self, monkeypatch, caplog):
monkeypatch.delenv("OPENROUTER_API_KEY", raising=False)
with patch("agent.auxiliary_client._select_pool_entry", return_value=(False, None)):
with caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
client, model = resolve_provider_client("openrouter")
assert client is None
assert model is None
assert any(
"OPENROUTER_API_KEY not set" in record.message
for record in caplog.records
)
class TestGetTextAuxiliaryClient:
"""Test the full resolution chain for get_text_auxiliary_client."""
def test_codex_pool_entry_takes_priority_over_auth_store(self):
class _Entry:
access_token = "pooled-codex-token"
base_url = "https://chatgpt.com/backend-api/codex"
class _Pool:
def has_credentials(self):
return True
def select(self):
return _Entry()
with (
patch("agent.auxiliary_client.load_pool", return_value=_Pool()),
patch("agent.auxiliary_client.OpenAI"),
patch("hermes_cli.auth._read_codex_tokens", side_effect=AssertionError("legacy codex store should not run")),
):
from agent.auxiliary_client import _try_codex
client, model = _try_codex()
from agent.auxiliary_client import CodexAuxiliaryClient
assert isinstance(client, CodexAuxiliaryClient)
assert model == "gpt-5.2-codex"
class TestNousAuxiliaryRefresh:
def test_try_nous_prefers_runtime_credentials(self):
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
mock_openai.return_value = MagicMock()
client, model = _try_nous()
assert client is not None
# No Portal recommendation → falls back to the hardcoded default.
assert model == "google/gemini-3-flash-preview"
assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
assert mock_openai.call_args.kwargs["base_url"] == fresh_base
def test_try_nous_uses_portal_recommendation_for_text(self):
"""When the Portal recommends a compaction model, _try_nous honors it."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
mock_openai.return_value = MagicMock()
client, model = _try_nous(vision=False)
assert client is not None
assert model == "minimax/minimax-m2.7"
assert mock_rec.call_args.kwargs["vision"] is False
def test_try_nous_uses_portal_recommendation_for_vision(self):
"""Vision tasks should ask for the vision-specific recommendation."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
patch("agent.auxiliary_client.OpenAI"),
):
from agent.auxiliary_client import _try_nous
client, model = _try_nous(vision=True)
assert client is not None
assert model == "google/gemini-3-flash-preview"
assert mock_rec.call_args.kwargs["vision"] is True
def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
"""If the Portal lookup throws, we must still return a usable model."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
patch("agent.auxiliary_client.OpenAI"),
):
from agent.auxiliary_client import _try_nous
client, model = _try_nous()
assert client is not None
assert model == "google/gemini-3-flash-preview"
def test_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
status_code = 401
stale_client = MagicMock()
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
stale_client.chat.completions.create.side_effect = _Auth401("stale nous key")
fresh_client = MagicMock()
fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
fresh_client.chat.completions.create.return_value = {"ok": True}
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
):
result = call_llm(
task="compression",
messages=[{"role": "user", "content": "hi"}],
)
assert result == {"ok": True}
assert stale_client.chat.completions.create.call_count == 1
assert fresh_client.chat.completions.create.call_count == 1
@pytest.mark.asyncio
async def test_async_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
status_code = 401
stale_client = MagicMock()
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key"))
fresh_async_client = MagicMock()
fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
):
result = await async_call_llm(
task="session_search",
messages=[{"role": "user", "content": "hi"}],
)
assert result == {"ok": True}
assert stale_client.chat.completions.create.await_count == 1
assert fresh_async_client.chat.completions.create.await_count == 1
# ── Payment / credit exhaustion fallback ─────────────────────────────────
class TestIsPaymentError:
"""_is_payment_error detects 402 and credit-related errors."""
def test_402_status_code(self):
exc = Exception("Payment Required")
exc.status_code = 402
assert _is_payment_error(exc) is True
def test_402_with_credits_message(self):
exc = Exception("You requested up to 65535 tokens, but can only afford 8029")
exc.status_code = 402
assert _is_payment_error(exc) is True
def test_429_with_credits_message(self):
exc = Exception("insufficient credits remaining")
exc.status_code = 429
assert _is_payment_error(exc) is True
def test_429_without_credits_message_is_not_payment(self):
"""Normal rate limits should NOT be treated as payment errors."""
exc = Exception("Rate limit exceeded, try again in 2 seconds")
exc.status_code = 429
assert _is_payment_error(exc) is False
def test_generic_500_is_not_payment(self):
exc = Exception("Internal server error")
exc.status_code = 500
assert _is_payment_error(exc) is False
def test_no_status_code_with_billing_message(self):
exc = Exception("billing: payment required for this request")
assert _is_payment_error(exc) is True
def test_no_status_code_no_message(self):
exc = Exception("connection reset")
assert _is_payment_error(exc) is False
class TestGetProviderChain:
"""_get_provider_chain() resolves functions at call time (testable)."""
def test_returns_five_entries(self):
chain = _get_provider_chain()
assert len(chain) == 5
labels = [label for label, _ in chain]
assert labels == ["openrouter", "nous", "local/custom", "openai-codex", "api-key"]
def test_picks_up_patched_functions(self):
"""Patches on _try_* functions must be visible in the chain."""
sentinel = lambda: ("patched", "model")
with patch("agent.auxiliary_client._try_openrouter", sentinel):
chain = _get_provider_chain()
assert chain[0] == ("openrouter", sentinel)
class TestTryPaymentFallback:
"""_try_payment_fallback skips the failed provider and tries alternatives."""
def test_skips_failed_provider(self):
mock_client = MagicMock()
with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
patch("agent.auxiliary_client._try_nous", return_value=(mock_client, "nous-model")), \
patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"):
client, model, label = _try_payment_fallback("openrouter", task="compression")
assert client is mock_client
assert model == "nous-model"
assert label == "nous"
def test_returns_none_when_no_fallback(self):
with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \
patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \
patch("agent.auxiliary_client._try_codex", return_value=(None, None)), \
patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \
patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"):
client, model, label = _try_payment_fallback("openrouter")
assert client is None
assert label == ""
def test_codex_alias_maps_to_chain_label(self):
"""'codex' should map to 'openai-codex' in the skip set."""
mock_client = MagicMock()
with patch("agent.auxiliary_client._try_openrouter", return_value=(mock_client, "or-model")), \
patch("agent.auxiliary_client._try_codex", return_value=(None, None)), \
patch("agent.auxiliary_client._read_main_provider", return_value="openai-codex"):
client, model, label = _try_payment_fallback("openai-codex", task="vision")
assert client is mock_client
assert label == "openrouter"
def test_skips_to_codex_when_or_and_nous_fail(self):
mock_codex = MagicMock()
with patch("agent.auxiliary_client._try_openrouter", return_value=(None, None)), \
patch("agent.auxiliary_client._try_nous", return_value=(None, None)), \
patch("agent.auxiliary_client._try_custom_endpoint", return_value=(None, None)), \
patch("agent.auxiliary_client._try_codex", return_value=(mock_codex, "gpt-5.2-codex")), \
patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"):
client, model, label = _try_payment_fallback("openrouter")
assert client is mock_codex
assert model == "gpt-5.2-codex"
assert label == "openai-codex"
class TestCallLlmPaymentFallback:
"""call_llm() retries with a different provider on 402 / payment errors."""
def _make_402_error(self, msg="Payment Required: insufficient credits"):
exc = Exception(msg)
exc.status_code = 402
return exc
def test_non_payment_error_not_caught(self, monkeypatch):
"""Non-payment/non-connection errors (500) should NOT trigger fallback."""
monkeypatch.setenv("OPENROUTER_API_KEY", "or-key")
primary_client = MagicMock()
server_err = Exception("Internal Server Error")
server_err.status_code = 500
primary_client.chat.completions.create.side_effect = server_err
with patch("agent.auxiliary_client._get_cached_client",
return_value=(primary_client, "google/gemini-3-flash-preview")), \
patch("agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", "google/gemini-3-flash-preview", None, None, None)):
with pytest.raises(Exception, match="Internal Server Error"):
call_llm(
task="compression",
messages=[{"role": "user", "content": "hello"}],
)
# ---------------------------------------------------------------------------
# Gate: _resolve_api_key_provider must skip anthropic when not configured
# ---------------------------------------------------------------------------
def test_resolve_api_key_provider_skips_unconfigured_anthropic(monkeypatch):
"""_resolve_api_key_provider must not try anthropic when user never configured it."""
from collections import OrderedDict
from hermes_cli.auth import ProviderConfig
# Build a minimal registry with only "anthropic" so the loop is guaranteed
# to reach it without being short-circuited by earlier providers.
fake_registry = OrderedDict({
"anthropic": ProviderConfig(
id="anthropic",
name="Anthropic",
auth_type="api_key",
inference_base_url="https://api.anthropic.com",
api_key_env_vars=("ANTHROPIC_API_KEY",),
),
})
called = []
def mock_try_anthropic():
called.append("anthropic")
return None, None
monkeypatch.setattr("agent.auxiliary_client._try_anthropic", mock_try_anthropic)
monkeypatch.setattr("hermes_cli.auth.PROVIDER_REGISTRY", fake_registry)
monkeypatch.setattr(
"hermes_cli.auth.is_provider_explicitly_configured",
lambda pid: False,
)
from agent.auxiliary_client import _resolve_api_key_provider
_resolve_api_key_provider()
assert "anthropic" not in called, \
"_try_anthropic() should not be called when anthropic is not explicitly configured"
# ---------------------------------------------------------------------------
# model="default" elimination (#7512)
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# _try_payment_fallback reason parameter (#7512 bug 3)
# ---------------------------------------------------------------------------
# ---------------------------------------------------------------------------
# _is_connection_error coverage
# ---------------------------------------------------------------------------
class TestIsConnectionError:
"""Tests for _is_connection_error detection."""
def test_connection_refused(self):
from agent.auxiliary_client import _is_connection_error
err = Exception("Connection refused")
assert _is_connection_error(err) is True
def test_timeout(self):
from agent.auxiliary_client import _is_connection_error
err = Exception("Request timed out.")
assert _is_connection_error(err) is True
def test_dns_failure(self):
from agent.auxiliary_client import _is_connection_error
err = Exception("Name or service not known")
assert _is_connection_error(err) is True
def test_normal_api_error_not_connection(self):
from agent.auxiliary_client import _is_connection_error
err = Exception("Bad Request: invalid model")
err.status_code = 400
assert _is_connection_error(err) is False
def test_500_not_connection(self):
from agent.auxiliary_client import _is_connection_error
err = Exception("Internal Server Error")
err.status_code = 500
assert _is_connection_error(err) is False
class TestKimiTemperatureOmitted:
"""Kimi/Moonshot models should have temperature OMITTED from API kwargs.
The Kimi gateway selects the correct temperature server-side based on the
active mode (thinking → 1.0, non-thinking → 0.6). Sending any temperature
value conflicts with gateway-managed defaults.
"""
@pytest.mark.parametrize(
"model",
[
"kimi-for-coding",
"kimi-k2.5",
"kimi-k2.6",
"kimi-k2-turbo-preview",
"kimi-k2-0905-preview",
"kimi-k2-thinking",
"kimi-k2-thinking-turbo",
"kimi-k2-instruct",
"kimi-k2-instruct-0905",
"moonshotai/kimi-k2.5",
"moonshotai/Kimi-K2-Thinking",
"moonshotai/Kimi-K2-Instruct",
],
)
def test_kimi_models_omit_temperature(self, model):
"""No kimi model should have a temperature key in kwargs."""
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="kimi-coding",
model=model,
messages=[{"role": "user", "content": "hello"}],
temperature=0.3,
)
assert "temperature" not in kwargs
def test_kimi_for_coding_no_temperature_when_none(self):
"""When caller passes temperature=None, still no temperature key."""
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="kimi-coding",
model="kimi-for-coding",
messages=[{"role": "user", "content": "hello"}],
temperature=None,
)
assert "temperature" not in kwargs
def test_sync_call_omits_temperature(self):
client = MagicMock()
client.base_url = "https://api.kimi.com/coding/v1"
response = MagicMock()
client.chat.completions.create.return_value = response
with patch(
"agent.auxiliary_client._get_cached_client",
return_value=(client, "kimi-for-coding"),
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", "kimi-for-coding", None, None, None),
):
result = call_llm(
task="session_search",
messages=[{"role": "user", "content": "hello"}],
temperature=0.1,
)
assert result is response
kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["model"] == "kimi-for-coding"
assert "temperature" not in kwargs
@pytest.mark.asyncio
async def test_async_call_omits_temperature(self):
client = MagicMock()
client.base_url = "https://api.kimi.com/coding/v1"
response = MagicMock()
client.chat.completions.create = AsyncMock(return_value=response)
with patch(
"agent.auxiliary_client._get_cached_client",
return_value=(client, "kimi-for-coding"),
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", "kimi-for-coding", None, None, None),
):
result = await async_call_llm(
task="session_search",
messages=[{"role": "user", "content": "hello"}],
temperature=0.1,
)
assert result is response
kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["model"] == "kimi-for-coding"
assert "temperature" not in kwargs
@pytest.mark.parametrize(
"model",
[
"anthropic/claude-sonnet-4-6",
"gpt-5.4",
"deepseek-chat",
],
)
def test_non_kimi_models_preserve_temperature(self, model):
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="openrouter",
model=model,
messages=[{"role": "user", "content": "hello"}],
temperature=0.3,
)
assert kwargs["temperature"] == 0.3
@pytest.mark.parametrize(
"base_url",
[
"https://api.moonshot.ai/v1",
"https://api.moonshot.cn/v1",
"https://api.kimi.com/coding/v1",
],
)
def test_kimi_k2_5_omits_temperature_regardless_of_endpoint(self, base_url):
"""Temperature is omitted regardless of which Kimi endpoint is used."""
from agent.auxiliary_client import _build_call_kwargs
kwargs = _build_call_kwargs(
provider="kimi-coding",
model="kimi-k2.5",
messages=[{"role": "user", "content": "hello"}],
temperature=0.1,
base_url=base_url,
)
assert "temperature" not in kwargs
# ---------------------------------------------------------------------------
# async_call_llm payment / connection fallback (#7512 bug 2)
# ---------------------------------------------------------------------------
class TestStaleBaseUrlWarning:
"""_resolve_auto() warns when OPENAI_BASE_URL conflicts with config provider (#5161)."""
def test_warns_when_openai_base_url_set_with_named_provider(self, monkeypatch, caplog):
"""Warning fires when OPENAI_BASE_URL is set but provider is a named provider."""
import agent.auxiliary_client as mod
# Reset the module-level flag so the warning fires
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
_resolve_auto()
assert any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
"Expected a warning about stale OPENAI_BASE_URL"
assert mod._stale_base_url_warned is True
class TestAuxiliaryUnsupportedParameterRetry:
def test_sync_call_retries_without_temperature_when_endpoint_rejects_it(self):
client = MagicMock()
client.base_url = "https://chatgpt.com/backend-api/codex/"
response = MagicMock()
response.choices = [MagicMock(message=MagicMock(content="ok"))]
calls = []
def create(**kwargs):
calls.append(dict(kwargs))
if len(calls) == 1:
raise Exception(
"HTTP 400: {'detail': 'Unsupported parameter: temperature'}"
)
return response
client.chat.completions.create.side_effect = create
with patch(
"agent.auxiliary_client._get_cached_client",
return_value=(client, "gpt-5.5"),
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("openai-codex", "gpt-5.5", None, None, None),
):
result = call_llm(
task="flush_memories",
messages=[{"role": "user", "content": "hi"}],
temperature=0.3,
max_tokens=32,
)
assert result is response
assert calls[0]["temperature"] == 0.3
assert "temperature" not in calls[1]
assert calls[1]["max_tokens"] == 32
@pytest.mark.asyncio
async def test_async_call_retries_without_temperature_when_endpoint_rejects_it(self):
client = MagicMock()
client.base_url = "https://chatgpt.com/backend-api/codex/"
response = MagicMock()
response.choices = [MagicMock(message=MagicMock(content="ok"))]
calls = []
async def create(**kwargs):
calls.append(dict(kwargs))
if len(calls) == 1:
raise Exception(
"HTTP 400: {'detail': 'Unsupported parameter: temperature'}"
)
return response
client.chat.completions.create = AsyncMock(side_effect=create)
with patch(
"agent.auxiliary_client._get_cached_client",
return_value=(client, "gpt-5.5"),
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("openai-codex", "gpt-5.5", None, None, None),
):
result = await async_call_llm(
task="flush_memories",
messages=[{"role": "user", "content": "hi"}],
temperature=0.3,
max_tokens=32,
)
assert result is response
assert calls[0]["temperature"] == 0.3
assert "temperature" not in calls[1]
assert calls[1]["max_tokens"] == 32
class TestAuxiliaryTaskExtraBody:
def test_sync_call_merges_task_extra_body_from_config(self):
client = MagicMock()
client.base_url = "https://api.example.com/v1"
response = MagicMock()
client.chat.completions.create.return_value = response
config = {
"auxiliary": {
"session_search": {
"extra_body": {
"enable_thinking": False,
"reasoning": {"effort": "none"},
}
}
}
}
with patch("hermes_cli.config.load_config", return_value=config), patch(
"agent.auxiliary_client._get_cached_client",
return_value=(client, "glm-4.5-air"),
):
result = call_llm(
task="session_search",
messages=[{"role": "user", "content": "hello"}],
extra_body={"metadata": {"source": "test"}},
)
assert result is response
kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["extra_body"]["enable_thinking"] is False
assert kwargs["extra_body"]["reasoning"] == {"effort": "none"}
assert kwargs["extra_body"]["metadata"] == {"source": "test"}
@pytest.mark.asyncio
async def test_async_call_explicit_extra_body_overrides_task_config(self):
client = MagicMock()
client.base_url = "https://api.example.com/v1"
response = MagicMock()
client.chat.completions.create = AsyncMock(return_value=response)
config = {
"auxiliary": {
"session_search": {
"extra_body": {"enable_thinking": False}
}
}
}
with patch("hermes_cli.config.load_config", return_value=config), patch(
"agent.auxiliary_client._get_cached_client",
return_value=(client, "glm-4.5-air"),
):
result = await async_call_llm(
task="session_search",
messages=[{"role": "user", "content": "hello"}],
extra_body={"enable_thinking": True},
)
assert result is response
kwargs = client.chat.completions.create.call_args.kwargs
assert kwargs["extra_body"]["enable_thinking"] is True
def test_no_warning_when_provider_is_custom(self, monkeypatch, caplog):
"""No warning when the provider is 'custom' — OPENAI_BASE_URL is expected."""
import agent.auxiliary_client as mod
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
with patch("agent.auxiliary_client._read_main_provider", return_value="custom"), \
patch("agent.auxiliary_client._read_main_model", return_value="llama3"), \
patch("agent.auxiliary_client._resolve_custom_runtime",
return_value=("http://localhost:11434/v1", "test-key", None)), \
patch("agent.auxiliary_client.OpenAI") as mock_openai, \
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
mock_openai.return_value = MagicMock()
_resolve_auto()
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
"Should NOT warn when provider is 'custom'"
def test_no_warning_when_provider_is_named_custom(self, monkeypatch, caplog):
"""No warning when the provider is 'custom:myname' — base_url comes from config."""
import agent.auxiliary_client as mod
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
monkeypatch.setenv("OPENAI_BASE_URL", "http://localhost:11434/v1")
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
with patch("agent.auxiliary_client._read_main_provider", return_value="custom:ollama-local"), \
patch("agent.auxiliary_client._read_main_model", return_value="llama3"), \
patch("agent.auxiliary_client.resolve_provider_client",
return_value=(MagicMock(), "llama3")), \
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
_resolve_auto()
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
"Should NOT warn when provider is 'custom:*'"
def test_no_warning_when_openai_base_url_not_set(self, monkeypatch, caplog):
"""No warning when OPENAI_BASE_URL is absent."""
import agent.auxiliary_client as mod
monkeypatch.setattr(mod, "_stale_base_url_warned", False)
monkeypatch.delenv("OPENAI_BASE_URL", raising=False)
monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-test")
with patch("agent.auxiliary_client._read_main_provider", return_value="openrouter"), \
patch("agent.auxiliary_client._read_main_model", return_value="google/gemini-flash"), \
caplog.at_level(logging.WARNING, logger="agent.auxiliary_client"):
_resolve_auto()
assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
"Should NOT warn when OPENAI_BASE_URL is not set"
# ---------------------------------------------------------------------------
# Anthropic-compatible image block conversion
# ---------------------------------------------------------------------------
class TestAnthropicCompatImageConversion:
"""Tests for _is_anthropic_compat_endpoint and _convert_openai_images_to_anthropic."""
def test_known_providers_detected(self):
from agent.auxiliary_client import _is_anthropic_compat_endpoint
assert _is_anthropic_compat_endpoint("minimax", "")
assert _is_anthropic_compat_endpoint("minimax-cn", "")
def test_openrouter_not_detected(self):
from agent.auxiliary_client import _is_anthropic_compat_endpoint
assert not _is_anthropic_compat_endpoint("openrouter", "")
assert not _is_anthropic_compat_endpoint("anthropic", "")
def test_url_based_detection(self):
from agent.auxiliary_client import _is_anthropic_compat_endpoint
assert _is_anthropic_compat_endpoint("custom", "https://api.minimax.io/anthropic")
assert _is_anthropic_compat_endpoint("custom", "https://example.com/anthropic/v1")
assert not _is_anthropic_compat_endpoint("custom", "https://api.openai.com/v1")
def test_base64_image_converted(self):
from agent.auxiliary_client import _convert_openai_images_to_anthropic
messages = [{
"role": "user",
"content": [
{"type": "text", "text": "describe"},
{"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR="}}
]
}]
result = _convert_openai_images_to_anthropic(messages)
img_block = result[0]["content"][1]
assert img_block["type"] == "image"
assert img_block["source"]["type"] == "base64"
assert img_block["source"]["media_type"] == "image/png"
assert img_block["source"]["data"] == "iVBOR="
def test_url_image_converted(self):
from agent.auxiliary_client import _convert_openai_images_to_anthropic
messages = [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}}
]
}]
result = _convert_openai_images_to_anthropic(messages)
img_block = result[0]["content"][0]
assert img_block["type"] == "image"
assert img_block["source"]["type"] == "url"
assert img_block["source"]["url"] == "https://example.com/img.jpg"
def test_text_only_messages_unchanged(self):
from agent.auxiliary_client import _convert_openai_images_to_anthropic
messages = [{"role": "user", "content": "Hello"}]
result = _convert_openai_images_to_anthropic(messages)
assert result[0] is messages[0] # same object, not copied
def test_jpeg_media_type_parsed(self):
from agent.auxiliary_client import _convert_openai_images_to_anthropic
messages = [{
"role": "user",
"content": [
{"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,/9j/="}}
]
}]
result = _convert_openai_images_to_anthropic(messages)
assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg"
class _AuxAuth401(Exception):
status_code = 401
def __init__(self, message="Provided authentication token is expired"):
super().__init__(message)
class _DummyResponse:
def __init__(self, text="ok"):
self.choices = [MagicMock(message=MagicMock(content=text))]
class _FailingThenSuccessCompletions:
def __init__(self):
self.calls = 0
def create(self, **kwargs):
self.calls += 1
if self.calls == 1:
raise _AuxAuth401()
return _DummyResponse("sync-ok")
class _AsyncFailingThenSuccessCompletions:
def __init__(self):
self.calls = 0
async def create(self, **kwargs):
self.calls += 1
if self.calls == 1:
raise _AuxAuth401()
return _DummyResponse("async-ok")
class TestAuxiliaryAuthRefreshRetry:
def test_call_llm_refreshes_codex_on_401_for_vision(self):
failing_client = MagicMock()
failing_client.base_url = "https://chatgpt.com/backend-api/codex"
failing_client.chat.completions = _FailingThenSuccessCompletions()
fresh_client = MagicMock()
fresh_client.base_url = "https://chatgpt.com/backend-api/codex"
fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-sync")
with (
patch(
"agent.auxiliary_client.resolve_vision_provider_client",
side_effect=[("openai-codex", failing_client, "gpt-5.2-codex"), ("openai-codex", fresh_client, "gpt-5.2-codex")],
),
patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
):
resp = call_llm(
task="vision",
provider="openai-codex",
model="gpt-5.2-codex",
messages=[{"role": "user", "content": "hi"}],
)
assert resp.choices[0].message.content == "fresh-sync"
mock_refresh.assert_called_once_with("openai-codex")
def test_call_llm_refreshes_codex_on_401_for_non_vision(self):
stale_client = MagicMock()
stale_client.base_url = "https://chatgpt.com/backend-api/codex"
stale_client.chat.completions.create.side_effect = _AuxAuth401("stale codex token")
fresh_client = MagicMock()
fresh_client.base_url = "https://chatgpt.com/backend-api/codex"
fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-non-vision")
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("openai-codex", "gpt-5.2-codex", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "gpt-5.2-codex"), (fresh_client, "gpt-5.2-codex")]),
patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
):
resp = call_llm(
task="compression",
provider="openai-codex",
model="gpt-5.2-codex",
messages=[{"role": "user", "content": "hi"}],
)
assert resp.choices[0].message.content == "fresh-non-vision"
mock_refresh.assert_called_once_with("openai-codex")
assert stale_client.chat.completions.create.call_count == 1
assert fresh_client.chat.completions.create.call_count == 1
def test_call_llm_refreshes_anthropic_on_401_for_non_vision(self):
stale_client = MagicMock()
stale_client.base_url = "https://api.anthropic.com"
stale_client.chat.completions.create.side_effect = _AuxAuth401("anthropic token expired")
fresh_client = MagicMock()
fresh_client.base_url = "https://api.anthropic.com"
fresh_client.chat.completions.create.return_value = _DummyResponse("fresh-anthropic")
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("anthropic", "claude-haiku-4-5-20251001", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "claude-haiku-4-5-20251001"), (fresh_client, "claude-haiku-4-5-20251001")]),
patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
):
resp = call_llm(
task="compression",
provider="anthropic",
model="claude-haiku-4-5-20251001",
messages=[{"role": "user", "content": "hi"}],
)
assert resp.choices[0].message.content == "fresh-anthropic"
mock_refresh.assert_called_once_with("anthropic")
assert stale_client.chat.completions.create.call_count == 1
assert fresh_client.chat.completions.create.call_count == 1
@pytest.mark.asyncio
async def test_async_call_llm_refreshes_codex_on_401_for_vision(self):
failing_client = MagicMock()
failing_client.base_url = "https://chatgpt.com/backend-api/codex"
failing_client.chat.completions = _AsyncFailingThenSuccessCompletions()
fresh_client = MagicMock()
fresh_client.base_url = "https://chatgpt.com/backend-api/codex"
fresh_client.chat.completions.create = AsyncMock(return_value=_DummyResponse("fresh-async"))
with (
patch(
"agent.auxiliary_client.resolve_vision_provider_client",
side_effect=[("openai-codex", failing_client, "gpt-5.2-codex"), ("openai-codex", fresh_client, "gpt-5.2-codex")],
),
patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
):
resp = await async_call_llm(
task="vision",
provider="openai-codex",
model="gpt-5.2-codex",
messages=[{"role": "user", "content": "hi"}],
)
assert resp.choices[0].message.content == "fresh-async"
mock_refresh.assert_called_once_with("openai-codex")
def test_refresh_provider_credentials_force_refreshes_anthropic_oauth_and_evicts_cache(self, monkeypatch):
stale_client = MagicMock()
cache_key = ("anthropic", False, None, None, None)
monkeypatch.setenv("ANTHROPIC_TOKEN", "")
monkeypatch.setenv("CLAUDE_CODE_OAUTH_TOKEN", "")
monkeypatch.setenv("ANTHROPIC_API_KEY", "")
with (
patch("agent.auxiliary_client._client_cache", {cache_key: (stale_client, "claude-haiku-4-5-20251001", None)}),
patch("agent.anthropic_adapter.read_claude_code_credentials", return_value={
"accessToken": "expired-token",
"refreshToken": "refresh-token",
"expiresAt": 0,
}),
patch("agent.anthropic_adapter.refresh_anthropic_oauth_pure", return_value={
"access_token": "fresh-token",
"refresh_token": "refresh-token-2",
"expires_at_ms": 9999999999999,
}) as mock_refresh_oauth,
patch("agent.anthropic_adapter._write_claude_code_credentials") as mock_write,
):
from agent.auxiliary_client import _refresh_provider_credentials
assert _refresh_provider_credentials("anthropic") is True
mock_refresh_oauth.assert_called_once_with("refresh-token", use_json=False)
mock_write.assert_called_once_with("fresh-token", "refresh-token-2", 9999999999999)
stale_client.close.assert_called_once()
@pytest.mark.asyncio
async def test_async_call_llm_refreshes_anthropic_on_401_for_non_vision(self):
stale_client = MagicMock()
stale_client.base_url = "https://api.anthropic.com"
stale_client.chat.completions.create = AsyncMock(side_effect=_AuxAuth401("anthropic token expired"))
fresh_client = MagicMock()
fresh_client.base_url = "https://api.anthropic.com"
fresh_client.chat.completions.create = AsyncMock(return_value=_DummyResponse("fresh-async-anthropic"))
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("anthropic", "claude-haiku-4-5-20251001", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", side_effect=[(stale_client, "claude-haiku-4-5-20251001"), (fresh_client, "claude-haiku-4-5-20251001")]),
patch("agent.auxiliary_client._refresh_provider_credentials", return_value=True) as mock_refresh,
):
resp = await async_call_llm(
task="compression",
provider="anthropic",
model="claude-haiku-4-5-20251001",
messages=[{"role": "user", "content": "hi"}],
)
assert resp.choices[0].message.content == "fresh-async-anthropic"
mock_refresh.assert_called_once_with("anthropic")
assert stale_client.chat.completions.create.await_count == 1
assert fresh_client.chat.completions.create.await_count == 1