mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
fix(auth): honor NOUS_INFERENCE_BASE_URL env override for Nous OAuth sessions (#52270)
The host-allowlist hardening (#30611) plus the refresh heal (#49735) left the documented NOUS_INFERENCE_BASE_URL dev/staging escape hatch unreachable for OAuth sessions, despite three code comments asserting it still works. Root cause — resolution precedence in resolve_nous_runtime_credentials: inference_base_url = ( _optional_base_url(state.get("inference_base_url")) # stored — wins or os.getenv("NOUS_INFERENCE_BASE_URL") # env — unreachable or DEFAULT_NOUS_INFERENCE_URL ) A staging OAuth login persists its inference_base_url, but the allowlist rejects the staging host and the refresh heal rewrites the stored value to the production default. The stored (now prod) value is then read BEFORE the env var, so the override never takes effect — every request 401s against prod or is pinned to prod, and setting the env var does nothing. Fix: the user-set env override is the most-trusted source, so consult it FIRST for the URL used to build the client / returned to callers — while keeping the PERSISTED value the validated, network-provenance one (the override is a runtime overlay, never written to auth.json, so unsetting it cleanly reverts to prod). Applied at both chokepoints: - resolve_nous_runtime_credentials (no-refresh read path AND refresh path) - the nous_portal proxy adapter, which re-validates the resolver's returned base_url against the prod allowlist as defense-in-depth and would otherwise reject a legitimate staging override at the forward boundary. New _nous_inference_env_override() / split of stored-vs-effective URL keep the threat model intact: Portal-returned URLs are still allowlist-validated at every network site, and the env path stays ungated (trusted OS user). Also folds in the no-refresh read-path heal (supersedes the approach in the open #50265): a poisoned stored staging host now heals to the prod default on read even when no refresh fires. Tests: TestEnvOverrideWins (env wins on read + refresh paths; override never persisted; poisoned stored heals) and TestProxyAdapterEnvOverride. Verified the 4 behavioral tests fail against pre-fix code and pass with the fix; full inference-validation + nous-provider suites green (85 passed). E2E-validated against a real temp HERMES_HOME exercising the real resolver + proxy adapter: resolver→staging, persisted→prod, proxy→staging, unset→reverts to prod.
This commit is contained in:
parent
d6cf383d74
commit
736e981abf
4 changed files with 216 additions and 11 deletions
|
|
@ -1731,6 +1731,19 @@ def _validate_nous_inference_url_from_network(url: Optional[str]) -> Optional[st
|
|||
return cleaned.rstrip("/")
|
||||
|
||||
|
||||
def _nous_inference_env_override() -> Optional[str]:
|
||||
"""Return the user-set ``NOUS_INFERENCE_BASE_URL`` override, if any.
|
||||
|
||||
This is the documented dev/staging escape hatch. The env source is
|
||||
trusted (the OS user set it themselves), so it is intentionally NOT
|
||||
gated by the network host allowlist — unlike Portal-returned URLs.
|
||||
|
||||
Returns a trailing-slash-stripped non-empty string, or ``None`` when
|
||||
the env var is unset/blank.
|
||||
"""
|
||||
return _optional_base_url(os.getenv("NOUS_INFERENCE_BASE_URL"))
|
||||
|
||||
|
||||
def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
|
||||
if not isinstance(token, str) or token.count(".") != 2:
|
||||
return {}
|
||||
|
|
@ -5507,11 +5520,24 @@ def resolve_nous_runtime_credentials(
|
|||
or os.getenv("NOUS_PORTAL_BASE_URL")
|
||||
or DEFAULT_NOUS_PORTAL_URL
|
||||
).rstrip("/")
|
||||
inference_base_url = (
|
||||
_optional_base_url(state.get("inference_base_url"))
|
||||
or os.getenv("NOUS_INFERENCE_BASE_URL")
|
||||
# Persisted value: validated network-provenance only. The stored
|
||||
# inference_base_url is re-validated on read so a poisoned/stale
|
||||
# staging host (persisted before the allowlist existed) heals to the
|
||||
# production default on the no-refresh read path — this is what gets
|
||||
# written back to auth.json. The env override is deliberately NOT
|
||||
# folded in here: it must never be persisted (it's a runtime overlay).
|
||||
stored_inference_base_url = (
|
||||
_validate_nous_inference_url_from_network(
|
||||
_optional_base_url(state.get("inference_base_url"))
|
||||
)
|
||||
or DEFAULT_NOUS_INFERENCE_URL
|
||||
).rstrip("/")
|
||||
)
|
||||
# Effective value used to build the client / returned to callers:
|
||||
# the NOUS_INFERENCE_BASE_URL env override wins (documented dev/staging
|
||||
# escape hatch), else the validated stored value.
|
||||
inference_base_url = (
|
||||
_nous_inference_env_override() or stored_inference_base_url
|
||||
)
|
||||
client_id = str(state.get("client_id") or DEFAULT_NOUS_CLIENT_ID)
|
||||
|
||||
def _persist_state(reason: str) -> None:
|
||||
|
|
@ -5635,10 +5661,15 @@ def resolve_nous_runtime_credentials(
|
|||
# Heal a poisoned stored value (see refresh_nous_oauth_pure):
|
||||
# reject → reset to production default, don't keep a stale
|
||||
# staging host that re-validates to None every refresh.
|
||||
# The local inference_base_url is persisted to state below
|
||||
# (and used for the client), so healing it here suffices.
|
||||
# This (validated, network-provenance) value is what gets
|
||||
# persisted to auth.json below. The NOUS_INFERENCE_BASE_URL
|
||||
# env override is layered on for the client/return value
|
||||
# only (see below) — it is never persisted.
|
||||
refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
|
||||
inference_base_url = refreshed_url or DEFAULT_NOUS_INFERENCE_URL
|
||||
stored_inference_base_url = refreshed_url or DEFAULT_NOUS_INFERENCE_URL
|
||||
inference_base_url = (
|
||||
_nous_inference_env_override() or stored_inference_base_url
|
||||
)
|
||||
state["obtained_at"] = now.isoformat()
|
||||
state["expires_in"] = access_ttl
|
||||
state["expires_at"] = datetime.fromtimestamp(
|
||||
|
|
@ -5667,8 +5698,11 @@ def resolve_nous_runtime_credentials(
|
|||
)
|
||||
|
||||
# Persist routing and TLS metadata for non-interactive refresh.
|
||||
# Persist the validated, network-provenance URL — NEVER the env
|
||||
# override (which is a runtime-only overlay; persisting it would
|
||||
# leak a dev/staging host into auth.json and survive unsetting it).
|
||||
state["portal_base_url"] = portal_base_url
|
||||
state["inference_base_url"] = inference_base_url
|
||||
state["inference_base_url"] = stored_inference_base_url
|
||||
state["client_id"] = client_id
|
||||
state["tls"] = {
|
||||
"insecure": verify is False,
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from hermes_cli.auth import (
|
|||
_load_auth_store,
|
||||
_auth_store_lock,
|
||||
_is_terminal_nous_refresh_error,
|
||||
_nous_inference_env_override,
|
||||
_quarantine_nous_oauth_state,
|
||||
_quarantine_nous_pool_entries,
|
||||
_save_auth_store,
|
||||
|
|
@ -132,8 +133,17 @@ class NousPortalAdapter(UpstreamAdapter):
|
|||
"Try `hermes auth add nous` to re-authenticate."
|
||||
)
|
||||
|
||||
# base_url returned by resolve_nous_runtime_credentials() already
|
||||
# honors the NOUS_INFERENCE_BASE_URL env override (the documented
|
||||
# dev/staging escape hatch). Re-validating it here against the prod
|
||||
# host allowlist would wrongly reject a legitimate staging override,
|
||||
# so layer the same env-first overlay on top of the network-validated
|
||||
# value: env override wins, else validate the returned URL, else
|
||||
# fall back to the production default (defense-in-depth for a future
|
||||
# source-layer bypass).
|
||||
base_url = (
|
||||
_validate_nous_inference_url_from_network(refreshed.get("base_url"))
|
||||
_nous_inference_env_override()
|
||||
or _validate_nous_inference_url_from_network(refreshed.get("base_url"))
|
||||
or DEFAULT_NOUS_INFERENCE_URL
|
||||
)
|
||||
base_url = base_url.rstrip("/")
|
||||
|
|
|
|||
|
|
@ -238,8 +238,8 @@ def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
|
|||
"active_provider": "nous",
|
||||
"providers": {
|
||||
"nous": {
|
||||
"portal_base_url": "https://portal.example.com",
|
||||
"inference_base_url": "https://inference.example.com/v1",
|
||||
"portal_base_url": "https://portal.nousresearch.com",
|
||||
"inference_base_url": "https://inference-api.nousresearch.com/v1",
|
||||
"client_id": "hermes-cli",
|
||||
"token_type": "Bearer",
|
||||
"scope": auth_mod.DEFAULT_NOUS_SCOPE,
|
||||
|
|
|
|||
|
|
@ -291,3 +291,164 @@ class TestHealsPoisonedStoredValue:
|
|||
|
||||
result = auth.refresh_nous_oauth_from_state(state, force_refresh=True)
|
||||
assert result["inference_base_url"] == good
|
||||
|
||||
|
||||
class TestEnvOverrideWins:
|
||||
"""``NOUS_INFERENCE_BASE_URL`` must win over the stored value for the
|
||||
URL used to build the inference client / returned to callers.
|
||||
|
||||
This is the documented dev/staging escape hatch. The breakage it
|
||||
regresses against: the security allowlist (#30611) plus the refresh
|
||||
heal (#49735) mean a staging login's stored ``inference_base_url`` is
|
||||
rejected and rewritten to the production default, and the runtime
|
||||
resolver previously read that stored (prod) value *before* the env
|
||||
var — so an OAuth user could not reach staging at all, even with the
|
||||
env override set. The override is consulted FIRST here, while the
|
||||
PERSISTED value stays the validated, network-provenance one (the env
|
||||
override is a runtime overlay, never written to auth.json).
|
||||
"""
|
||||
|
||||
STAGING = "https://stg-inference-api.nousresearch.com/v1"
|
||||
|
||||
def _patch_no_refresh(self, monkeypatch, auth, state):
|
||||
import contextlib
|
||||
|
||||
# No refresh fires: the stored access token is a usable invoke JWT.
|
||||
monkeypatch.setattr(auth, "_nous_invoke_jwt_status", lambda *a, **k: None)
|
||||
monkeypatch.setattr(
|
||||
auth, "_auth_store_lock", lambda *a, **k: contextlib.nullcontext()
|
||||
)
|
||||
monkeypatch.setattr(auth, "_load_auth_store", lambda *a, **k: {})
|
||||
monkeypatch.setattr(auth, "_load_provider_state", lambda store, pid: state)
|
||||
monkeypatch.setattr(auth, "_save_provider_state", lambda *a, **k: None)
|
||||
monkeypatch.setattr(auth, "_save_auth_store", lambda *a, **k: None)
|
||||
monkeypatch.setattr(auth, "_write_shared_nous_state", lambda *a, **k: None)
|
||||
monkeypatch.setattr(auth, "_sync_nous_pool_from_auth_store", lambda *a, **k: None)
|
||||
monkeypatch.setattr(auth, "_resolve_verify", lambda *a, **k: True)
|
||||
monkeypatch.setattr(auth, "_assert_nous_inference_jwt_usable", lambda *a, **k: None)
|
||||
monkeypatch.setattr(auth, "_select_nous_invoke_jwt", lambda *a, **k: None)
|
||||
|
||||
def _base_state(self, auth, stored):
|
||||
return {
|
||||
"access_token": "tok",
|
||||
"refresh_token": "rtok",
|
||||
"client_id": "hermes-cli",
|
||||
"portal_base_url": auth.DEFAULT_NOUS_PORTAL_URL,
|
||||
"inference_base_url": stored,
|
||||
"agent_key": "ak-123",
|
||||
}
|
||||
|
||||
def test_no_refresh_env_override_wins_over_prod_stored(self, monkeypatch):
|
||||
"""The exact regression: a prod-pinned stored value (the state a
|
||||
staging login lands in after the heal) must NOT shadow the env
|
||||
override on the steady-state read path."""
|
||||
import hermes_cli.auth as auth
|
||||
|
||||
state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL)
|
||||
self._patch_no_refresh(monkeypatch, auth, state)
|
||||
monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", self.STAGING)
|
||||
|
||||
result = auth.resolve_nous_runtime_credentials()
|
||||
|
||||
assert result["base_url"] == self.STAGING, (
|
||||
"env override must win over the stored production URL on the "
|
||||
f"no-refresh read path, got {result['base_url']!r}"
|
||||
)
|
||||
|
||||
def test_no_refresh_env_override_not_persisted(self, monkeypatch):
|
||||
"""The env override is a runtime overlay: it must never be written
|
||||
back into the stored state (auth.json)."""
|
||||
import hermes_cli.auth as auth
|
||||
|
||||
state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL)
|
||||
self._patch_no_refresh(monkeypatch, auth, state)
|
||||
monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", self.STAGING)
|
||||
|
||||
auth.resolve_nous_runtime_credentials()
|
||||
|
||||
assert state["inference_base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL, (
|
||||
"env override leaked into persisted state — it must stay a "
|
||||
f"runtime overlay, got {state['inference_base_url']!r}"
|
||||
)
|
||||
|
||||
def test_no_refresh_no_env_uses_stored_default(self, monkeypatch):
|
||||
"""With no env override, the validated stored value is used."""
|
||||
import hermes_cli.auth as auth
|
||||
|
||||
state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL)
|
||||
self._patch_no_refresh(monkeypatch, auth, state)
|
||||
monkeypatch.delenv("NOUS_INFERENCE_BASE_URL", raising=False)
|
||||
|
||||
result = auth.resolve_nous_runtime_credentials()
|
||||
assert result["base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL
|
||||
|
||||
def test_no_refresh_heals_poisoned_stored_without_env(self, monkeypatch):
|
||||
"""A poisoned stored staging host (persisted before the allowlist)
|
||||
still heals to the default when no env override is present — the
|
||||
#50265 no-refresh-read-path heal, folded in here."""
|
||||
import hermes_cli.auth as auth
|
||||
|
||||
state = self._base_state(auth, self.STAGING)
|
||||
self._patch_no_refresh(monkeypatch, auth, state)
|
||||
monkeypatch.delenv("NOUS_INFERENCE_BASE_URL", raising=False)
|
||||
|
||||
result = auth.resolve_nous_runtime_credentials()
|
||||
assert result["base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL, (
|
||||
"poisoned stored URL must heal to the production default on the "
|
||||
f"no-refresh read path, got {result['base_url']!r}"
|
||||
)
|
||||
|
||||
def test_refresh_env_override_wins_but_persists_validated(self, monkeypatch):
|
||||
"""On the refresh path: env override is used for the returned/client
|
||||
URL, but the PERSISTED stored value is the validated network one
|
||||
(production default when the Portal hands back a rejected host)."""
|
||||
import hermes_cli.auth as auth
|
||||
|
||||
state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL)
|
||||
self._patch_no_refresh(monkeypatch, auth, state)
|
||||
# Force the refresh branch; Portal hands back a (rejected) staging host.
|
||||
monkeypatch.setattr(auth, "_nous_invoke_jwt_status", lambda *a, **k: "needs_refresh")
|
||||
monkeypatch.setattr(
|
||||
auth,
|
||||
"_refresh_access_token",
|
||||
lambda **k: {
|
||||
"access_token": "newtok",
|
||||
"refresh_token": "newrtok",
|
||||
"expires_in": 3600,
|
||||
"inference_base_url": self.STAGING,
|
||||
},
|
||||
)
|
||||
monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", self.STAGING)
|
||||
|
||||
result = auth.resolve_nous_runtime_credentials(force_refresh=True)
|
||||
|
||||
assert result["base_url"] == self.STAGING, (
|
||||
"env override must win for the returned URL on the refresh path"
|
||||
)
|
||||
assert state["inference_base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL, (
|
||||
"refresh path must persist the validated network value (prod "
|
||||
f"default), not the env override, got {state['inference_base_url']!r}"
|
||||
)
|
||||
|
||||
|
||||
class TestProxyAdapterEnvOverride:
|
||||
"""The Nous proxy adapter is the second chokepoint: it re-validates the
|
||||
base_url returned by resolve_nous_runtime_credentials() against the prod
|
||||
allowlist. That re-validation must not clobber a legitimate
|
||||
NOUS_INFERENCE_BASE_URL staging override.
|
||||
"""
|
||||
|
||||
def test_proxy_adapter_consults_env_override(self):
|
||||
"""Grep contract: the proxy adapter's forward-boundary base_url
|
||||
resolution consults the env override before the network validator,
|
||||
so a staging override survives the defense-in-depth re-validation."""
|
||||
from pathlib import Path
|
||||
import hermes_cli.proxy.adapters.nous_portal as _nous_adapter
|
||||
|
||||
source = Path(_nous_adapter.__file__).read_text(encoding="utf-8")
|
||||
assert "_nous_inference_env_override()" in source, (
|
||||
"proxy adapter must layer the env override on top of the network "
|
||||
"validator, else a staging override is rejected at the forward boundary"
|
||||
)
|
||||
# The validator must still be present (defense-in-depth preserved).
|
||||
assert "_validate_nous_inference_url_from_network" in source
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue