diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index 4271ec20417..4a0571a180b 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -1731,6 +1731,19 @@ def _validate_nous_inference_url_from_network(url: Optional[str]) -> Optional[st
     return cleaned.rstrip("/")
 
 
+def _nous_inference_env_override() -> Optional[str]:
+    """Return the user-set ``NOUS_INFERENCE_BASE_URL`` override, if any.
+
+    This is the documented dev/staging escape hatch. The env source is
+    trusted (the OS user set it themselves), so it is intentionally NOT
+    gated by the network host allowlist — unlike Portal-returned URLs.
+
+    Returns a trailing-slash-stripped non-empty string, or ``None`` when
+    the env var is unset/blank.
+    """
+    return _optional_base_url(os.getenv("NOUS_INFERENCE_BASE_URL"))
+
+
 def _decode_jwt_claims(token: Any) -> Dict[str, Any]:
     if not isinstance(token, str) or token.count(".") != 2:
         return {}
@@ -5507,11 +5520,24 @@ def resolve_nous_runtime_credentials(
             or os.getenv("NOUS_PORTAL_BASE_URL")
             or DEFAULT_NOUS_PORTAL_URL
         ).rstrip("/")
-        inference_base_url = (
-            _optional_base_url(state.get("inference_base_url"))
-            or os.getenv("NOUS_INFERENCE_BASE_URL")
+        # Persisted value: validated network-provenance only. The stored
+        # inference_base_url is re-validated on read so a poisoned/stale
+        # staging host (persisted before the allowlist existed) heals to the
+        # production default on the no-refresh read path — this is what gets
+        # written back to auth.json. The env override is deliberately NOT
+        # folded in here: it must never be persisted (it's a runtime overlay).
+        stored_inference_base_url = (
+            _validate_nous_inference_url_from_network(
+                _optional_base_url(state.get("inference_base_url"))
+            )
             or DEFAULT_NOUS_INFERENCE_URL
-        ).rstrip("/")
+        )
+        # Effective value used to build the client / returned to callers:
+        # the NOUS_INFERENCE_BASE_URL env override wins (documented dev/staging
+        # escape hatch), else the validated stored value.
+        inference_base_url = (
+            _nous_inference_env_override() or stored_inference_base_url
+        )
         client_id = str(state.get("client_id") or DEFAULT_NOUS_CLIENT_ID)
 
         def _persist_state(reason: str) -> None:
@@ -5635,10 +5661,15 @@ def resolve_nous_runtime_credentials(
                         # Heal a poisoned stored value (see refresh_nous_oauth_pure):
                         # reject → reset to production default, don't keep a stale
                         # staging host that re-validates to None every refresh.
-                        # The local inference_base_url is persisted to state below
-                        # (and used for the client), so healing it here suffices.
+                        # This (validated, network-provenance) value is what gets
+                        # persisted to auth.json below. The NOUS_INFERENCE_BASE_URL
+                        # env override is layered on for the client/return value
+                        # only (see below) — it is never persisted.
                         refreshed_url = _validate_nous_inference_url_from_network(refreshed.get("inference_base_url"))
-                        inference_base_url = refreshed_url or DEFAULT_NOUS_INFERENCE_URL
+                        stored_inference_base_url = refreshed_url or DEFAULT_NOUS_INFERENCE_URL
+                        inference_base_url = (
+                            _nous_inference_env_override() or stored_inference_base_url
+                        )
                         state["obtained_at"] = now.isoformat()
                         state["expires_in"] = access_ttl
                         state["expires_at"] = datetime.fromtimestamp(
@@ -5667,8 +5698,11 @@ def resolve_nous_runtime_credentials(
             )
 
             # Persist routing and TLS metadata for non-interactive refresh.
+            # Persist the validated, network-provenance URL — NEVER the env
+            # override (which is a runtime-only overlay; persisting it would
+            # leak a dev/staging host into auth.json and survive unsetting it).
             state["portal_base_url"] = portal_base_url
-            state["inference_base_url"] = inference_base_url
+            state["inference_base_url"] = stored_inference_base_url
             state["client_id"] = client_id
             state["tls"] = {
                 "insecure": verify is False,
diff --git a/hermes_cli/proxy/adapters/nous_portal.py b/hermes_cli/proxy/adapters/nous_portal.py
index 4759d8dd22b..18c0123a25b 100644
--- a/hermes_cli/proxy/adapters/nous_portal.py
+++ b/hermes_cli/proxy/adapters/nous_portal.py
@@ -17,6 +17,7 @@ from hermes_cli.auth import (
     _load_auth_store,
     _auth_store_lock,
     _is_terminal_nous_refresh_error,
+    _nous_inference_env_override,
     _quarantine_nous_oauth_state,
     _quarantine_nous_pool_entries,
     _save_auth_store,
@@ -132,8 +133,17 @@ class NousPortalAdapter(UpstreamAdapter):
                     "Try `hermes auth add nous` to re-authenticate."
                 )
 
+            # base_url returned by resolve_nous_runtime_credentials() already
+            # honors the NOUS_INFERENCE_BASE_URL env override (the documented
+            # dev/staging escape hatch). Re-validating it here against the prod
+            # host allowlist would wrongly reject a legitimate staging override,
+            # so layer the same env-first overlay on top of the network-validated
+            # value: env override wins, else validate the returned URL, else
+            # fall back to the production default (defense-in-depth for a future
+            # source-layer bypass).
             base_url = (
-                _validate_nous_inference_url_from_network(refreshed.get("base_url"))
+                _nous_inference_env_override()
+                or _validate_nous_inference_url_from_network(refreshed.get("base_url"))
                 or DEFAULT_NOUS_INFERENCE_URL
             )
             base_url = base_url.rstrip("/")
diff --git a/tests/hermes_cli/test_auth_nous_provider.py b/tests/hermes_cli/test_auth_nous_provider.py
index 32b175a5b12..53812f4e718 100644
--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@@ -238,8 +238,8 @@ def test_resolve_nous_runtime_credentials_invoke_jwt_is_idempotent(
         "active_provider": "nous",
         "providers": {
             "nous": {
-                "portal_base_url": "https://portal.example.com",
-                "inference_base_url": "https://inference.example.com/v1",
+                "portal_base_url": "https://portal.nousresearch.com",
+                "inference_base_url": "https://inference-api.nousresearch.com/v1",
                 "client_id": "hermes-cli",
                 "token_type": "Bearer",
                 "scope": auth_mod.DEFAULT_NOUS_SCOPE,
diff --git a/tests/hermes_cli/test_nous_inference_url_validation.py b/tests/hermes_cli/test_nous_inference_url_validation.py
index 193a342cff2..3aa3dc2d563 100644
--- a/tests/hermes_cli/test_nous_inference_url_validation.py
+++ b/tests/hermes_cli/test_nous_inference_url_validation.py
@@ -291,3 +291,164 @@ class TestHealsPoisonedStoredValue:
 
         result = auth.refresh_nous_oauth_from_state(state, force_refresh=True)
         assert result["inference_base_url"] == good
+
+
+class TestEnvOverrideWins:
+    """``NOUS_INFERENCE_BASE_URL`` must win over the stored value for the
+    URL used to build the inference client / returned to callers.
+
+    This is the documented dev/staging escape hatch. The breakage it
+    regresses against: the security allowlist (#30611) plus the refresh
+    heal (#49735) mean a staging login's stored ``inference_base_url`` is
+    rejected and rewritten to the production default, and the runtime
+    resolver previously read that stored (prod) value *before* the env
+    var — so an OAuth user could not reach staging at all, even with the
+    env override set. The override is consulted FIRST here, while the
+    PERSISTED value stays the validated, network-provenance one (the env
+    override is a runtime overlay, never written to auth.json).
+    """
+
+    STAGING = "https://stg-inference-api.nousresearch.com/v1"
+
+    def _patch_no_refresh(self, monkeypatch, auth, state):
+        import contextlib
+
+        # No refresh fires: the stored access token is a usable invoke JWT.
+        monkeypatch.setattr(auth, "_nous_invoke_jwt_status", lambda *a, **k: None)
+        monkeypatch.setattr(
+            auth, "_auth_store_lock", lambda *a, **k: contextlib.nullcontext()
+        )
+        monkeypatch.setattr(auth, "_load_auth_store", lambda *a, **k: {})
+        monkeypatch.setattr(auth, "_load_provider_state", lambda store, pid: state)
+        monkeypatch.setattr(auth, "_save_provider_state", lambda *a, **k: None)
+        monkeypatch.setattr(auth, "_save_auth_store", lambda *a, **k: None)
+        monkeypatch.setattr(auth, "_write_shared_nous_state", lambda *a, **k: None)
+        monkeypatch.setattr(auth, "_sync_nous_pool_from_auth_store", lambda *a, **k: None)
+        monkeypatch.setattr(auth, "_resolve_verify", lambda *a, **k: True)
+        monkeypatch.setattr(auth, "_assert_nous_inference_jwt_usable", lambda *a, **k: None)
+        monkeypatch.setattr(auth, "_select_nous_invoke_jwt", lambda *a, **k: None)
+
+    def _base_state(self, auth, stored):
+        return {
+            "access_token": "tok",
+            "refresh_token": "rtok",
+            "client_id": "hermes-cli",
+            "portal_base_url": auth.DEFAULT_NOUS_PORTAL_URL,
+            "inference_base_url": stored,
+            "agent_key": "ak-123",
+        }
+
+    def test_no_refresh_env_override_wins_over_prod_stored(self, monkeypatch):
+        """The exact regression: a prod-pinned stored value (the state a
+        staging login lands in after the heal) must NOT shadow the env
+        override on the steady-state read path."""
+        import hermes_cli.auth as auth
+
+        state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL)
+        self._patch_no_refresh(monkeypatch, auth, state)
+        monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", self.STAGING)
+
+        result = auth.resolve_nous_runtime_credentials()
+
+        assert result["base_url"] == self.STAGING, (
+            "env override must win over the stored production URL on the "
+            f"no-refresh read path, got {result['base_url']!r}"
+        )
+
+    def test_no_refresh_env_override_not_persisted(self, monkeypatch):
+        """The env override is a runtime overlay: it must never be written
+        back into the stored state (auth.json)."""
+        import hermes_cli.auth as auth
+
+        state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL)
+        self._patch_no_refresh(monkeypatch, auth, state)
+        monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", self.STAGING)
+
+        auth.resolve_nous_runtime_credentials()
+
+        assert state["inference_base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL, (
+            "env override leaked into persisted state — it must stay a "
+            f"runtime overlay, got {state['inference_base_url']!r}"
+        )
+
+    def test_no_refresh_no_env_uses_stored_default(self, monkeypatch):
+        """With no env override, the validated stored value is used."""
+        import hermes_cli.auth as auth
+
+        state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL)
+        self._patch_no_refresh(monkeypatch, auth, state)
+        monkeypatch.delenv("NOUS_INFERENCE_BASE_URL", raising=False)
+
+        result = auth.resolve_nous_runtime_credentials()
+        assert result["base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL
+
+    def test_no_refresh_heals_poisoned_stored_without_env(self, monkeypatch):
+        """A poisoned stored staging host (persisted before the allowlist)
+        still heals to the default when no env override is present — the
+        #50265 no-refresh-read-path heal, folded in here."""
+        import hermes_cli.auth as auth
+
+        state = self._base_state(auth, self.STAGING)
+        self._patch_no_refresh(monkeypatch, auth, state)
+        monkeypatch.delenv("NOUS_INFERENCE_BASE_URL", raising=False)
+
+        result = auth.resolve_nous_runtime_credentials()
+        assert result["base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL, (
+            "poisoned stored URL must heal to the production default on the "
+            f"no-refresh read path, got {result['base_url']!r}"
+        )
+
+    def test_refresh_env_override_wins_but_persists_validated(self, monkeypatch):
+        """On the refresh path: env override is used for the returned/client
+        URL, but the PERSISTED stored value is the validated network one
+        (production default when the Portal hands back a rejected host)."""
+        import hermes_cli.auth as auth
+
+        state = self._base_state(auth, auth.DEFAULT_NOUS_INFERENCE_URL)
+        self._patch_no_refresh(monkeypatch, auth, state)
+        # Force the refresh branch; Portal hands back a (rejected) staging host.
+        monkeypatch.setattr(auth, "_nous_invoke_jwt_status", lambda *a, **k: "needs_refresh")
+        monkeypatch.setattr(
+            auth,
+            "_refresh_access_token",
+            lambda **k: {
+                "access_token": "newtok",
+                "refresh_token": "newrtok",
+                "expires_in": 3600,
+                "inference_base_url": self.STAGING,
+            },
+        )
+        monkeypatch.setenv("NOUS_INFERENCE_BASE_URL", self.STAGING)
+
+        result = auth.resolve_nous_runtime_credentials(force_refresh=True)
+
+        assert result["base_url"] == self.STAGING, (
+            "env override must win for the returned URL on the refresh path"
+        )
+        assert state["inference_base_url"] == auth.DEFAULT_NOUS_INFERENCE_URL, (
+            "refresh path must persist the validated network value (prod "
+            f"default), not the env override, got {state['inference_base_url']!r}"
+        )
+
+
+class TestProxyAdapterEnvOverride:
+    """The Nous proxy adapter is the second chokepoint: it re-validates the
+    base_url returned by resolve_nous_runtime_credentials() against the prod
+    allowlist. That re-validation must not clobber a legitimate
+    NOUS_INFERENCE_BASE_URL staging override.
+    """
+
+    def test_proxy_adapter_consults_env_override(self):
+        """Grep contract: the proxy adapter's forward-boundary base_url
+        resolution consults the env override before the network validator,
+        so a staging override survives the defense-in-depth re-validation."""
+        from pathlib import Path
+        import hermes_cli.proxy.adapters.nous_portal as _nous_adapter
+
+        source = Path(_nous_adapter.__file__).read_text(encoding="utf-8")
+        assert "_nous_inference_env_override()" in source, (
+            "proxy adapter must layer the env override on top of the network "
+            "validator, else a staging override is rejected at the forward boundary"
+        )
+        # The validator must still be present (defense-in-depth preserved).
+        assert "_validate_nous_inference_url_from_network" in source