diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 13efa8db1..ba9aafc74 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -465,9 +465,44 @@ def _read_main_model() -> str: return "" +def _resolve_custom_runtime() -> Tuple[Optional[str], Optional[str]]: + """Resolve the active custom/main endpoint the same way the main CLI does. + + This covers both env-driven OPENAI_BASE_URL setups and config-saved custom + endpoints where the base URL lives in config.yaml instead of the live + environment. + """ + try: + from hermes_cli.runtime_provider import resolve_runtime_provider + + runtime = resolve_runtime_provider(requested="custom") + except Exception as exc: + logger.debug("Auxiliary client: custom runtime resolution failed: %s", exc) + return None, None + + custom_base = runtime.get("base_url") + custom_key = runtime.get("api_key") + if not isinstance(custom_base, str) or not custom_base.strip(): + return None, None + if not isinstance(custom_key, str) or not custom_key.strip(): + return None, None + + custom_base = custom_base.strip().rstrip("/") + if "openrouter.ai" in custom_base.lower(): + # requested='custom' falls back to OpenRouter when no custom endpoint is + # configured. Treat that as "no custom endpoint" for auxiliary routing. + return None, None + + return custom_base, custom_key.strip() + + +def _current_custom_base_url() -> str: + custom_base, _ = _resolve_custom_runtime() + return custom_base or "" + + def _try_custom_endpoint() -> Tuple[Optional[OpenAI], Optional[str]]: - custom_base = os.getenv("OPENAI_BASE_URL") - custom_key = os.getenv("OPENAI_API_KEY") + custom_base, custom_key = _resolve_custom_runtime() if not custom_base or not custom_key: return None, None model = _read_main_model() or "gpt-4o-mini" @@ -829,7 +864,7 @@ def auxiliary_max_tokens_param(value: int) -> dict: The Codex adapter translates max_tokens internally, so we use max_tokens for it as well. """ - custom_base = os.getenv("OPENAI_BASE_URL", "") + custom_base = _current_custom_base_url() or_key = os.getenv("OPENROUTER_API_KEY") # Only use max_completion_tokens for direct OpenAI custom endpoints if (not or_key @@ -950,7 +985,7 @@ def _build_call_kwargs( # Codex adapter handles max_tokens internally; OpenRouter/Nous use max_tokens. # Direct OpenAI api.openai.com with newer models needs max_completion_tokens. if provider == "custom": - custom_base = os.getenv("OPENAI_BASE_URL", "") + custom_base = _current_custom_base_url() if "api.openai.com" in custom_base.lower(): kwargs["max_completion_tokens"] = max_tokens else: diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index fead68000..e0535357a 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -144,10 +144,16 @@ def _resolve_openrouter_runtime( env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip() use_config_base_url = False - if requested_norm == "auto": - if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url: + if cfg_base_url.strip() and not explicit_base_url and not env_openai_base_url: + if requested_norm == "auto": if not cfg_provider or cfg_provider == "auto": use_config_base_url = True + elif requested_norm == "custom": + # Persisted custom endpoints store their base URL in config.yaml. + # If OPENAI_BASE_URL is not currently set in the environment, keep + # honoring that saved endpoint instead of falling back to OpenRouter. + if cfg_provider == "custom": + use_config_base_url = True # When the user explicitly requested the openrouter provider, skip # OPENAI_BASE_URL — it typically points to a custom / non-OpenRouter diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index 57c73eb8b..d9e07bc6f 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -142,6 +142,29 @@ class TestGetTextAuxiliaryClient: call_kwargs = mock_openai.call_args assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1" + def test_custom_endpoint_uses_config_saved_base_url(self, monkeypatch): + config = { + "model": { + "provider": "custom", + "base_url": "http://localhost:1234/v1", + "default": "my-local-model", + } + } + monkeypatch.setenv("OPENAI_API_KEY", "lm-studio-key") + monkeypatch.setattr("hermes_cli.config.load_config", lambda: config) + monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config) + + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \ + patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = get_text_auxiliary_client() + + assert client is not None + assert model == "my-local-model" + call_kwargs = mock_openai.call_args + assert call_kwargs.kwargs["base_url"] == "http://localhost:1234/v1" + def test_codex_fallback_when_nothing_else(self, codex_auth_dir): with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ patch("agent.auxiliary_client.OpenAI") as mock_openai: @@ -320,6 +343,27 @@ class TestResolveForcedProvider: client, model = _resolve_forced_provider("main") assert model == "my-local-model" + def test_forced_main_uses_config_saved_custom_endpoint(self, monkeypatch): + config = { + "model": { + "provider": "custom", + "base_url": "http://local:8080/v1", + "default": "my-local-model", + } + } + monkeypatch.setenv("OPENAI_API_KEY", "local-key") + monkeypatch.setattr("hermes_cli.config.load_config", lambda: config) + monkeypatch.setattr("hermes_cli.runtime_provider.load_config", lambda: config) + with patch("agent.auxiliary_client._read_nous_auth", return_value=None), \ + patch("agent.auxiliary_client._read_codex_access_token", return_value=None), \ + patch("agent.auxiliary_client._resolve_api_key_provider", return_value=(None, None)), \ + patch("agent.auxiliary_client.OpenAI") as mock_openai: + client, model = _resolve_forced_provider("main") + assert client is not None + assert model == "my-local-model" + call_kwargs = mock_openai.call_args + assert call_kwargs.kwargs["base_url"] == "http://local:8080/v1" + def test_forced_main_skips_openrouter_nous(self, monkeypatch): """Even if OpenRouter key is set, 'main' skips it.""" monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") diff --git a/tests/test_runtime_provider_resolution.py b/tests/test_runtime_provider_resolution.py index a53c716a3..52d4a1d4f 100644 --- a/tests/test_runtime_provider_resolution.py +++ b/tests/test_runtime_provider_resolution.py @@ -131,13 +131,36 @@ def test_custom_endpoint_prefers_openai_key(monkeypatch): monkeypatch.setattr(rp, "_get_model_config", lambda: {}) monkeypatch.setenv("OPENAI_BASE_URL", "https://api.z.ai/api/coding/paas/v4") monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) - monkeypatch.setenv("OPENAI_API_KEY", "sk-zai-correct-key") - monkeypatch.setenv("OPENROUTER_API_KEY", "sk-or-wrong-key-for-zai") + monkeypatch.setenv("OPENAI_API_KEY", "zai-key") + monkeypatch.setenv("OPENROUTER_API_KEY", "openrouter-key") resolved = rp.resolve_runtime_provider(requested="custom") assert resolved["base_url"] == "https://api.z.ai/api/coding/paas/v4" - assert resolved["api_key"] == "sk-zai-correct-key" + assert resolved["api_key"] == "zai-key" + + +def test_custom_endpoint_uses_saved_config_base_url_when_env_missing(monkeypatch): + """Persisted custom endpoints in config.yaml must still resolve when + OPENAI_BASE_URL is absent from the current environment.""" + monkeypatch.setattr(rp, "resolve_provider", lambda *a, **k: "openrouter") + monkeypatch.setattr( + rp, + "_get_model_config", + lambda: { + "provider": "custom", + "base_url": "http://127.0.0.1:1234/v1", + }, + ) + monkeypatch.delenv("OPENAI_BASE_URL", raising=False) + monkeypatch.delenv("OPENROUTER_BASE_URL", raising=False) + monkeypatch.setenv("OPENAI_API_KEY", "local-key") + monkeypatch.setenv("OPENROUTER_API_KEY", "or-key") + + resolved = rp.resolve_runtime_provider(requested="custom") + + assert resolved["base_url"] == "http://127.0.0.1:1234/v1" + assert resolved["api_key"] == "local-key" def test_custom_endpoint_auto_provider_prefers_openai_key(monkeypatch):