fix(auth): mirror Nous OAuth credentials to providers.nous on CLI login

`hermes auth add nous --type oauth` only wrote credential_pool.nous, leaving providers.nous empty. When the Nous agent_key's 24h TTL expired, run_agent.py's 401-recovery path called resolve_nous_runtime_credentials (which reads providers.nous), got AuthError "Hermes is not logged into Nous Portal", caught it as logger.debug (suppressed at INFO level), and the agent died with "Non-retryable client error" — no signal to the user that recovery even tried. Introduce persist_nous_credentials() as the single source of truth for Nous device-code login persistence. Both auth_commands (CLI) and web_server (dashboard) now route through it, so pool and providers stay in sync at write time. Why: CLI-provisioned profiles couldn't recover from agent_key expiry, producing silent daily outages 24h after first login. PR #6856/#6869 addressed adjacent issues but assumed providers.nous was populated; this one wasn't being written. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-27 01:11:40 +00:00 · 2026-04-17 23:44:10 +00:00 · 2026-04-17 23:44:10 +00:00 · c096a6935f
commit c096a6935f
parent a155b4a159
5 changed files with 221 additions and 40 deletions
--- a/tests/hermes_cli/test_auth_nous_provider.py
+++ b/tests/hermes_cli/test_auth_nous_provider.py
@ -456,3 +456,152 @@ class TestLoginNousSkipKeepsCurrent:
        assert "nous" in auth_after.get("providers", {})


+# =============================================================================
+# persist_nous_credentials: shared helper for CLI + web dashboard login paths
+# =============================================================================
+
+
+def _full_state_fixture() -> dict:
+    """Shape of the dict returned by _nous_device_code_login /
+    refresh_nous_oauth_from_state. Used as helper input."""
+    return {
+        "portal_base_url": "https://portal.example.com",
+        "inference_base_url": "https://inference.example.com/v1",
+        "client_id": "hermes-cli",
+        "scope": "inference:mint_agent_key",
+        "token_type": "Bearer",
+        "access_token": "access-tok",
+        "refresh_token": "refresh-tok",
+        "obtained_at": "2026-04-17T22:00:00+00:00",
+        "expires_at": "2026-04-17T22:15:00+00:00",
+        "expires_in": 900,
+        "agent_key": "agent-key-value",
+        "agent_key_id": "ak-id",
+        "agent_key_expires_at": "2026-04-18T22:00:00+00:00",
+        "agent_key_expires_in": 86400,
+        "agent_key_reused": False,
+        "agent_key_obtained_at": "2026-04-17T22:00:10+00:00",
+        "tls": {"insecure": False, "ca_bundle": None},
+    }
+
+
+def test_persist_nous_credentials_writes_both_pool_and_providers(tmp_path, monkeypatch):
+    """Helper must populate BOTH credential_pool.nous AND providers.nous.
+
+    Regression guard: before this helper existed, `hermes auth add nous`
+    wrote only the pool. After the Nous agent_key's 24h TTL expired, the
+    401-recovery path in run_agent.py called resolve_nous_runtime_credentials
+    which reads providers.nous, found it empty, raised AuthError, and the
+    agent failed with "Non-retryable client error". Both stores must stay
+    in sync at write time.
+    """
+    from hermes_cli.auth import persist_nous_credentials
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1, "providers": {},
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    creds = _full_state_fixture()
+    entry = persist_nous_credentials(
+        creds, label="test-label", source="manual:device_code",
+    )
+
+    assert entry is not None
+    assert entry.provider == "nous"
+
+    payload = json.loads((hermes_home / "auth.json").read_text())
+
+    # providers.nous populated with the full state (new behavior)
+    singleton = payload["providers"]["nous"]
+    assert singleton["access_token"] == "access-tok"
+    assert singleton["refresh_token"] == "refresh-tok"
+    assert singleton["agent_key"] == "agent-key-value"
+    assert singleton["agent_key_expires_at"] == "2026-04-18T22:00:00+00:00"
+
+    # credential_pool.nous populated with the pool entry
+    pool_entries = payload["credential_pool"]["nous"]
+    pool_entry = next(
+        item for item in pool_entries if item["source"] == "manual:device_code"
+    )
+    assert pool_entry["label"] == "test-label"
+    assert pool_entry["agent_key"] == "agent-key-value"
+    assert pool_entry["base_url"] == "https://inference.example.com/v1"
+
+
+def test_persist_nous_credentials_allows_recovery_from_401(tmp_path, monkeypatch):
+    """End-to-end: after persisting via the helper, resolve_nous_runtime_credentials
+    must succeed (not raise "Hermes is not logged into Nous Portal").
+
+    This is the exact path that ran_agent.py's `_try_refresh_nous_client_credentials`
+    calls after a Nous 401 — before the fix it would raise AuthError because
+    providers.nous was empty.
+    """
+    from hermes_cli.auth import persist_nous_credentials, resolve_nous_runtime_credentials
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1, "providers": {},
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    persist_nous_credentials(
+        _full_state_fixture(),
+        label="recovery-test",
+        source="manual:device_code",
+    )
+
+    # Stub the network-touching steps so we don't actually contact the
+    # portal — the point of this test is that state lookup succeeds and
+    # doesn't raise "Hermes is not logged into Nous Portal".
+    def _fake_refresh_access_token(*, client, portal_base_url, client_id, refresh_token):
+        return {
+            "access_token": "access-new",
+            "refresh_token": "refresh-new",
+            "expires_in": 900,
+            "token_type": "Bearer",
+        }
+
+    def _fake_mint_agent_key(*, client, portal_base_url, access_token, min_ttl_seconds):
+        return _mint_payload(api_key="new-agent-key")
+
+    monkeypatch.setattr("hermes_cli.auth._refresh_access_token", _fake_refresh_access_token)
+    monkeypatch.setattr("hermes_cli.auth._mint_agent_key", _fake_mint_agent_key)
+
+    creds = resolve_nous_runtime_credentials(min_key_ttl_seconds=300, force_mint=True)
+    assert creds["api_key"] == "new-agent-key"
+
+
+def test_persist_nous_credentials_preserves_existing_providers_entry(tmp_path, monkeypatch):
+    """Calling persist twice must upsert providers.nous (not duplicate or crash)."""
+    from hermes_cli.auth import persist_nous_credentials
+
+    hermes_home = tmp_path / "hermes"
+    hermes_home.mkdir(parents=True, exist_ok=True)
+    (hermes_home / "auth.json").write_text(json.dumps({
+        "version": 1, "providers": {},
+    }))
+    monkeypatch.setenv("HERMES_HOME", str(hermes_home))
+
+    first = _full_state_fixture()
+    persist_nous_credentials(first, label="first", source="manual:device_code")
+
+    second = _full_state_fixture()
+    second["access_token"] = "access-second"
+    second["agent_key"] = "agent-key-second"
+    persist_nous_credentials(second, label="second", source="manual:device_code")
+
+    payload = json.loads((hermes_home / "auth.json").read_text())
+
+    # providers.nous reflects the latest write (singleton semantics)
+    assert payload["providers"]["nous"]["access_token"] == "access-second"
+    assert payload["providers"]["nous"]["agent_key"] == "agent-key-second"
+
+    # credential_pool.nous has both entries (pool = multi-credential)
+    pool_entries = payload["credential_pool"]["nous"]
+    labels = [e.get("label") for e in pool_entries]
+    assert "first" in labels
+    assert "second" in labels