diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 4271ec20417..4a0571a180b 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -1731,6 +1731,19 @@ def _validate_nous_inference_url_from_network(url: Optional[str]) -> Optional[st return cleaned.rstrip("/") +def _nous_inference_env_override() -> Optional[str]: + """Return the user-set ``NOUS_INFERENCE_BASE_URL`` override, if any. + + This is the documented dev/staging escape hatch. The env source is + trusted (the OS user set it themselves), so it is intentionally NOT + gated by the network host allowlist — unlike Portal-returned URLs. + + Returns a trailing-slash-stripped non-empty string, or ``None`` when + the env var is unset/blank. + """ + return _optional_base_url(os.getenv("NOUS_INFERENCE_BASE_URL")) + + def _decode_jwt_claims(token: Any) -> Dict[str, Any]: if not isinstance(token, str) or token.count(".") != 2: return {} @@ -5507,11 +5520,24 @@ def resolve_nous_runtime_credentials( or os.getenv("NOUS_PORTAL_BASE_URL") or DEFAULT_NOUS_PORTAL_URL ).rstrip("/") - inference_base_url = ( - _optional_base_url(state.get("inference_base_url")) - or os.getenv("NOUS_INFERENCE_BASE_URL") + # Persisted value: validated network-provenance only. The stored + # inference_base_url is re-validated on read so a poisoned/stale + # staging host (persisted before the allowlist existed) heals to the + # production default on the no-refresh read path — this is what gets + # written back to auth.json. The env override is deliberately NOT + # folded in here: it must never be persisted (it's a runtime overlay). + stored_inference_base_url = ( + _validate_nous_inference_url_from_network( + _optional_base_url(state.get("inference_base_url")) + ) or DEFAULT_NOUS_INFERENCE_URL - ).rstrip("/") + ) + # Effective value used to build the client / returned to callers: + # the NOUS_INFERENCE_BASE_URL env override wins (documented dev/staging + # escape hatch), else the validated stored value. + inference_base_url = ( + _nous_inference_env_override() or stored_inference_base_url + ) client_id = str(state.get("client_id") or DEFAULT_NOUS_CLIENT_ID) def _persist_state(reason: str) -> None: @@ -5635,10 +5661,15 @@ def resolve_nous_runtime_credentials( # Heal a poisoned stored value (see refresh_nous_oauth_pure): # reject → reset to production default, don't keep a stale # staging host that re-validates to None every refresh. - # The local inference_base_url is persisted to state below - # (and used for the client), so healing it here suffices. + # This (validated, network-provenance) value is what gets + # persisted to auth.json below. The NOUS_INFERENCE_BASE_URL + # env override is layered on for the client/return value + # only (see below) — it is never persisted. refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url")) - inference_base_url = refreshed_url or DEFAULT_NOUS_INFERENCE_URL + stored_inference_base_url = refreshed_url or DEFAULT_NOUS_INFERENCE_URL + inference_base_url = ( + _nous_inference_env_override() or stored_inference_base_url + ) state["obtained_at"] = now.isoformat() state["expires_in"] = access_ttl state["expires_at"] = datetime.fromtimestamp( @@ -5667,8 +5698,11 @@ def resolve_nous_runtime_credentials( ) # Persist routing and TLS metadata for non-interactive refresh. + # Persist the validated, network-provenance URL — NEVER the env + # override (which is a runtime-only overlay; persisting it would + # leak a dev/staging host into auth.json and survive unsetting it). state["portal_base_url"] = portal_base_url - state["inference_base_url"] = inference_base_url + state["inference_base_url"] = stored_inference_base_url state["client_id"] = client_id state["tls"] = { "insecure": verify is False, diff --git a/hermes_cli/proxy/adapters/nous_portal.py b/hermes_cli/proxy/adapters/nous_portal.py index 4759d8dd22b..18c0123a25b 100644 --- a/hermes_cli/proxy/adapters/nous_portal.py +++ b/hermes_cli/proxy/adapters/nous_portal.py @@ -17,6 +17,7 @@ from hermes_cli.auth import ( _load_auth_store, _auth_store_lock, _is_terminal_nous_refresh_error, + _nous_inference_env_override, _quarantine_nous_oauth_state, _quarantine_nous_pool_entries, _save_auth_store, @@ -132,8 +133,17 @@ class NousPortalAdapter(UpstreamAdapter): "Try `hermes auth add nous` to re-authenticate." ) + # base_url returned by resolve_nous_runtime_credentials() already + # honors the NOUS_INFERENCE_BASE_URL env override (the documented + # dev/staging escape hatch). Re-validating it here against the prod + # host allowlist would wrongly reject a legitimate staging override, + # so layer the same env-first overlay on top of the network-validated + # value: env override wins, else validate the returned URL, else + # fall back to the production default (defense-in-depth for a future + # source-layer bypass). base_url = ( - _validate_nous_inference_url_from_network(refreshed.get("base_url")) + _nous_inference_env_override() + or _validate_nous_inference_url_from_network(refreshed.get("base_url")) or DEFAULT_NOUS_INFERENCE_URL ) base_url = base_url.rstrip("/") diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py index 32b175a5b12..53812f4e718 100644 --- a/tests/hermes_cli/test_auth_nous_provider.py +++ b/tests/hermes_cli/test_auth_nous_provider.py @@ -238,8 +238,8 @@ def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent( "active_provider": "nous", "providers": { "nous": { - "portal_base_url": "https://portal.example.com", - "inference_base_url": "https://inference.example.com/v1", + "portal_base_url": "https://portal.nousresearch.com", + "inference_base_url": "https://inference-api.nousresearch.com/v1", "client_id": "hermes-cli", "token_type": "Bearer", "scope": auth_mod.DEFAULT_NOUS_SCOPE, diff --git a/tests/hermes_cli/test_nous_inference_url_validation.py b/tests/hermes_cli/test_nous_inference_url_validation.py index 193a342cff2..3aa3dc2d563 100644 --- a/tests/hermes_cli/test_nous_inference_url_validation.py +++ b/tests/hermes_cli/test_nous_inference_url_validation.py @@ -291,3 +291,164 @@ class TestHealsPoisonedStoredValue: result = auth.refresh_nous_oauth_from_state(state, force_refresh=True) assert result["inference_base_url"] == good + + +class TestEnvOverrideWins: + """``NOUS_INFERENCE_BASE_URL`` must win over the stored value for the + URL used to build the inference client / returned to callers. + + This is the documented dev/staging escape hatch. The breakage it + regresses against: the security allowlist (#30611) plus the refresh + heal (#49735) mean a staging login's stored ``inference_base_url`` is + rejected and rewritten to the production default, and the runtime + resolver previously read that stored (prod) value *before* the env + var — so an OAuth user could not reach staging at all, even with the + env override set. The override is consulted FIRST here, while the + PERSISTED value stays the validated, network-provenance one (the env + override is a runtime overlay, never written to auth.json). + """ + + STAGING = "https://stg-inference-api.nousresearch.com/v1" + + def _patch_no_refresh(self, monkeypatch, auth, state): + import contextlib + + # No refresh fires: the stored access token is a usable invoke JWT. + monkeypatch.setattr(auth, "_nous_invoke_jwt_status", lambda *a, **k: None) + monkeypatch.setattr( + auth, "_auth_store_lock", lambda *a, **k: contextlib.nullcontext() + ) + monkeypatch.setattr(auth, "_load_auth_store", lambda *a, **k: {}) + monkeypatch.setattr(auth, "_load_provider_state", lambda store, pid: state) + monkeypatch.setattr(auth, "_save_provider_state", lambda *a, **k: None) + monkeypatch.setattr(auth, "_save_auth_store", lambda *a, **k: None) + monkeypatch.setattr(auth, "_write_shared_nous_state", lambda *a, **k: None) + monkeypatch.setattr(auth, "_sync_nous_pool_from_auth_store", lambda *a, **k: None) + monkeypatch.setattr(auth, "_resolve_verify", lambda *a, **k: True) + monkeypatch.setattr(auth, "_assert_nous_inference_jwt_usable", lambda *a, **k: None) + monkeypatch.setattr(auth, "_select_nous_invoke_jwt", lambda *a, **k: None) + + def _base_state(self, auth, stored): + return { + "access_token": "tok", + "refresh_token": "rtok", + "client_id": "hermes-cli", + "portal_base_url": auth.DEFAULT_NOUS_PORTAL_URL, + "inference_base_url": stored, + "agent_key": "ak-123", + } + + def test_no_refresh_env_override_wins_over_prod_stored(self, monkeypatch): + """The exact regression: a prod-pinned stored value (the state a + staging login lands in after the heal) must NOT shadow the env + override on the steady-state read path.""" + import hermes_cli.auth as auth + + state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL) + self._patch_no_refresh(monkeypatch, auth, state) + monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", self.STAGING) + + result = auth.resolve_nous_runtime_credentials() + + assert result["base_url"] == self.STAGING, ( + "env override must win over the stored production URL on the " + f"no-refresh read path, got {result['base_url']!r}" + ) + + def test_no_refresh_env_override_not_persisted(self, monkeypatch): + """The env override is a runtime overlay: it must never be written + back into the stored state (auth.json).""" + import hermes_cli.auth as auth + + state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL) + self._patch_no_refresh(monkeypatch, auth, state) + monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", self.STAGING) + + auth.resolve_nous_runtime_credentials() + + assert state["inference_base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL, ( + "env override leaked into persisted state — it must stay a " + f"runtime overlay, got {state['inference_base_url']!r}" + ) + + def test_no_refresh_no_env_uses_stored_default(self, monkeypatch): + """With no env override, the validated stored value is used.""" + import hermes_cli.auth as auth + + state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL) + self._patch_no_refresh(monkeypatch, auth, state) + monkeypatch.delenv("NOUS_INFERENCE_BASE_URL", raising=False) + + result = auth.resolve_nous_runtime_credentials() + assert result["base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL + + def test_no_refresh_heals_poisoned_stored_without_env(self, monkeypatch): + """A poisoned stored staging host (persisted before the allowlist) + still heals to the default when no env override is present — the + #50265 no-refresh-read-path heal, folded in here.""" + import hermes_cli.auth as auth + + state = self._base_state(auth, self.STAGING) + self._patch_no_refresh(monkeypatch, auth, state) + monkeypatch.delenv("NOUS_INFERENCE_BASE_URL", raising=False) + + result = auth.resolve_nous_runtime_credentials() + assert result["base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL, ( + "poisoned stored URL must heal to the production default on the " + f"no-refresh read path, got {result['base_url']!r}" + ) + + def test_refresh_env_override_wins_but_persists_validated(self, monkeypatch): + """On the refresh path: env override is used for the returned/client + URL, but the PERSISTED stored value is the validated network one + (production default when the Portal hands back a rejected host).""" + import hermes_cli.auth as auth + + state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL) + self._patch_no_refresh(monkeypatch, auth, state) + # Force the refresh branch; Portal hands back a (rejected) staging host. + monkeypatch.setattr(auth, "_nous_invoke_jwt_status", lambda *a, **k: "needs_refresh") + monkeypatch.setattr( + auth, + "_refresh_access_token", + lambda **k: { + "access_token": "newtok", + "refresh_token": "newrtok", + "expires_in": 3600, + "inference_base_url": self.STAGING, + }, + ) + monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", self.STAGING) + + result = auth.resolve_nous_runtime_credentials(force_refresh=True) + + assert result["base_url"] == self.STAGING, ( + "env override must win for the returned URL on the refresh path" + ) + assert state["inference_base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL, ( + "refresh path must persist the validated network value (prod " + f"default), not the env override, got {state['inference_base_url']!r}" + ) + + +class TestProxyAdapterEnvOverride: + """The Nous proxy adapter is the second chokepoint: it re-validates the + base_url returned by resolve_nous_runtime_credentials() against the prod + allowlist. That re-validation must not clobber a legitimate + NOUS_INFERENCE_BASE_URL staging override. + """ + + def test_proxy_adapter_consults_env_override(self): + """Grep contract: the proxy adapter's forward-boundary base_url + resolution consults the env override before the network validator, + so a staging override survives the defense-in-depth re-validation.""" + from pathlib import Path + import hermes_cli.proxy.adapters.nous_portal as _nous_adapter + + source = Path(_nous_adapter.__file__).read_text(encoding="utf-8") + assert "_nous_inference_env_override()" in source, ( + "proxy adapter must layer the env override on top of the network " + "validator, else a staging override is rejected at the forward boundary" + ) + # The validator must still be present (defense-in-depth preserved). + assert "_validate_nous_inference_url_from_network" in source