diff --git a/cli-config.yaml.example b/cli-config.yaml.example index c119d0ac4b9..6f3c0a61d1f 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -1097,3 +1097,27 @@ display: # - command: "~/.hermes/agent-hooks/log-orchestration.sh" # # hooks_auto_accept: false + + +# ============================================================================= +# Web Dashboard +# ============================================================================= +# OAuth gate configuration for `hermes dashboard --host `. +# The bundled Nous Portal plugin reads these on startup; settings here are +# the canonical surface. Each can be overridden by an environment variable: +# +# dashboard.oauth.client_id <- HERMES_DASHBOARD_OAUTH_CLIENT_ID +# dashboard.oauth.portal_url <- HERMES_DASHBOARD_PORTAL_URL +# +# Env wins when set to a non-empty value. This is what Fly.io's platform- +# secret injection uses to push per-deploy client_ids without needing to +# bake a config.yaml into the image. Empty env values are treated as unset +# so a provisioned-but-not-populated secret can't shadow a valid entry here. +# +# Local dev / on-prem deploys should typically set these via config.yaml +# (the ~/.hermes/.env file is reserved for API keys and secrets). +# +# dashboard: +# oauth: +# client_id: "" # agent:{instance_id}; Portal provisions this at deploy +# portal_url: "" # blank → default https://portal.nousresearch.com diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 54d3d960fb7..58f355bd43c 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1180,6 +1180,23 @@ DEFAULT_CONFIG = { # Set this to True to re-enable the surfaces with the understanding # that the numbers are a local lower-bound estimate, not billing. "show_token_analytics": False, + # OAuth gate configuration (engaged when ``--host`` is set and + # ``--insecure`` is not). The bundled Nous Portal plugin reads + # both keys at startup; they are the canonical surface for these + # settings. Each can be overridden by an environment variable — + # ``HERMES_DASHBOARD_OAUTH_CLIENT_ID`` and + # ``HERMES_DASHBOARD_PORTAL_URL`` respectively — and the env var + # wins when set to a non-empty value. The override path is what + # Fly.io's platform-secret injection uses to push the per-deploy + # client_id at provisioning time without operators needing to + # touch config.yaml. Local dev / non-Fly deploys can set either + # surface; missing values fall through to the plugin's defaults + # (no provider registered when ``client_id`` is empty; + # ``portal_url`` defaults to https://portal.nousresearch.com). + "oauth": { + "client_id": "", # agent:{instance_id} — Portal provisions this + "portal_url": "", # blank → use plugin default (production Portal) + }, }, # Privacy settings diff --git a/plugins/dashboard_auth/nous/__init__.py b/plugins/dashboard_auth/nous/__init__.py index e434fbfb054..c9d4b744cf0 100644 --- a/plugins/dashboard_auth/nous/__init__.py +++ b/plugins/dashboard_auth/nous/__init__.py @@ -2,20 +2,31 @@ Implements ``nous-account-service/docs/agent-dashboard-oauth-contract.md`` (PR #180). The plugin auto-loads (bundled, kind=backend) but only registers -its provider when the Portal-injected env var is present, so loopback / -``--insecure`` operators are unaffected. +its provider when a client_id is configured — either via ``config.yaml`` or +via the Portal-injected env var — so loopback / ``--insecure`` operators +are unaffected. -Required env var (Portal injects at Fly.io provisioning): +Configuration surfaces (env wins over config.yaml when set non-empty): - HERMES_DASHBOARD_OAUTH_CLIENT_ID — shape ``agent:{agent_instance_id}`` + ``config.yaml`` — canonical surface:: -Optional env var: + dashboard: + oauth: + client_id: agent:{agent_instance_id} # required + portal_url: https://portal.example # optional - HERMES_DASHBOARD_PORTAL_URL — defaults to - ``https://portal.nousresearch.com`` - (production Portal). Override only - for staging (``portal.rewbs.uk``) - or a custom deployment. + Environment overrides — used by Fly.io's platform-secret injection so + per-deploy values don't need to bake into ``config.yaml``: + + HERMES_DASHBOARD_OAUTH_CLIENT_ID — shape ``agent:{agent_instance_id}`` + HERMES_DASHBOARD_PORTAL_URL — defaults to + ``https://portal.nousresearch.com`` + (production Portal). Override only + for staging (``portal.rewbs.uk``) + or a custom deployment. + +Empty env var values are treated as unset so a provisioned-but-not-populated +Fly secret can't shadow a valid config.yaml entry. Key contract points encoded here: @@ -442,40 +453,104 @@ class NousDashboardAuthProvider(DashboardAuthProvider): # --------------------------------------------------------------------------- +def _load_config_oauth_section() -> dict: + """Return the ``dashboard.oauth`` block from ``config.yaml`` if it + exists and is a dict; otherwise an empty dict. + + Robust to (a) load_config() raising (malformed YAML, IO error, + config.yaml absent — common in fresh installs), (b) the + ``dashboard`` key being absent or non-dict, and (c) the ``oauth`` + sub-key being present but not a dict (user typo). Each shape falls + through to ``{}`` so register() can rely on `.get(...)` access. + """ + try: + from hermes_cli.config import cfg_get, load_config + + cfg = load_config() + except Exception as exc: # noqa: BLE001 — broad catch is intentional + logger.debug( + "dashboard-auth-nous: load_config() raised %s; " + "falling back to env-only configuration", + exc, + ) + return {} + section = cfg_get(cfg, "dashboard", "oauth", default=None) + return section if isinstance(section, dict) else {} + + +def _resolve_client_id() -> str: + """Resolve the OAuth client_id with env-overrides-config precedence. + + Order: + 1. ``HERMES_DASHBOARD_OAUTH_CLIENT_ID`` env var (when non-empty + after strip — empty values are treated as unset so a + provisioned-but-not-populated Fly secret can't shadow a valid + config.yaml entry). + 2. ``dashboard.oauth.client_id`` in ``config.yaml``. + 3. Empty string — signals "no client_id configured" to the caller. + """ + env = os.environ.get("HERMES_DASHBOARD_OAUTH_CLIENT_ID", "").strip() + if env: + return env + cfg_value = _load_config_oauth_section().get("client_id", "") + return str(cfg_value).strip() + + +def _resolve_portal_url() -> str: + """Resolve the Portal URL with env-overrides-config precedence. + + Order: + 1. ``HERMES_DASHBOARD_PORTAL_URL`` env var (non-empty after strip). + 2. ``dashboard.oauth.portal_url`` in ``config.yaml``. + 3. :data:`_DEFAULT_PORTAL_URL` (production Portal). + """ + env = os.environ.get("HERMES_DASHBOARD_PORTAL_URL", "").strip() + if env: + return env + cfg_value = str( + _load_config_oauth_section().get("portal_url", "") + ).strip() + return cfg_value or _DEFAULT_PORTAL_URL + + def register(ctx) -> None: """Plugin entry — called by the plugin loader at startup. - Registers ``NousDashboardAuthProvider`` only when - ``HERMES_DASHBOARD_OAUTH_CLIENT_ID`` is set (the Portal injects this - at Fly.io provisioning). ``HERMES_DASHBOARD_PORTAL_URL`` defaults to - production; override only for staging or custom deployments. + Registers ``NousDashboardAuthProvider`` only when a client_id is + configured (either via ``HERMES_DASHBOARD_OAUTH_CLIENT_ID`` env var + or via ``dashboard.oauth.client_id`` in ``config.yaml``). The env + var wins when set non-empty — Fly.io's platform-secret injection + pushes the per-deploy value through this path. When skipping, writes a short human-readable reason to the module- level :data:`LAST_SKIP_REASON` so the dashboard's fail-closed branch can surface "Set HERMES_DASHBOARD_OAUTH_CLIENT_ID …" instead of the - bare "no providers registered" the gate would otherwise emit. + bare "no providers registered" the gate would otherwise emit. The + reason mentions BOTH configuration surfaces so operators don't + guess wrong about which one to populate. - Operator-owned dashboards (loopback / ``--insecure``) leave the env - var unset, so this plugin is a no-op for them. The gate-engagement - layer (``hermes_cli.web_server.should_require_auth`` + the fail- - closed check in ``start_server``) handles the "public bind with zero - providers" case independently. + Operator-owned dashboards (loopback / ``--insecure``) leave both + surfaces unset, so this plugin is a no-op for them. The gate- + engagement layer (``hermes_cli.web_server.should_require_auth`` + + the fail-closed check in ``start_server``) handles the "public bind + with zero providers" case independently. """ global LAST_SKIP_REASON LAST_SKIP_REASON = "" - client_id = os.environ.get("HERMES_DASHBOARD_OAUTH_CLIENT_ID", "").strip() - portal_url = ( - os.environ.get("HERMES_DASHBOARD_PORTAL_URL", "").strip() - or _DEFAULT_PORTAL_URL - ) + client_id = _resolve_client_id() + portal_url = _resolve_portal_url() if not client_id: LAST_SKIP_REASON = ( - "HERMES_DASHBOARD_OAUTH_CLIENT_ID is not set. The Nous Portal " - "provisions this env var (shape 'agent:{instance_id}') when it " - "deploys a Hermes Agent instance — set it to your provisioned " - "client id, or pass --insecure to skip the OAuth gate entirely." + "HERMES_DASHBOARD_OAUTH_CLIENT_ID is not set (and " + "dashboard.oauth.client_id in config.yaml is empty). The " + "Nous Portal provisions this env var (shape " + "'agent:{instance_id}') when it deploys a Hermes Agent " + "instance — set it to your provisioned client id (either " + "as an env var or under dashboard.oauth.client_id in " + "config.yaml), or pass --insecure to skip the OAuth gate " + "entirely." ) logger.debug("dashboard-auth-nous: %s", LAST_SKIP_REASON) return diff --git a/plugins/dashboard_auth/nous/plugin.yaml b/plugins/dashboard_auth/nous/plugin.yaml index 3fd8858a00e..c395c0c9165 100644 --- a/plugins/dashboard_auth/nous/plugin.yaml +++ b/plugins/dashboard_auth/nous/plugin.yaml @@ -1,6 +1,6 @@ name: nous version: 1.0.0 -description: "Dashboard auth provider — OAuth 2.0 (authorization-code + PKCE) against Nous Portal. Auto-activates when HERMES_DASHBOARD_OAUTH_CLIENT_ID is set (Portal injects this at Fly.io provisioning). HERMES_DASHBOARD_PORTAL_URL is optional and defaults to https://portal.nousresearch.com." +description: "Dashboard auth provider — OAuth 2.0 (authorization-code + PKCE) against Nous Portal. Auto-activates when a client_id is configured via either dashboard.oauth.client_id in config.yaml (canonical surface) or HERMES_DASHBOARD_OAUTH_CLIENT_ID env var (operator override; Portal injects this at Fly.io provisioning). dashboard.oauth.portal_url / HERMES_DASHBOARD_PORTAL_URL are optional and default to https://portal.nousresearch.com." author: NousResearch kind: backend requires_env: diff --git a/tests/plugins/dashboard_auth/test_nous_provider.py b/tests/plugins/dashboard_auth/test_nous_provider.py index 92806f15fb8..f6fc6fca42c 100644 --- a/tests/plugins/dashboard_auth/test_nous_provider.py +++ b/tests/plugins/dashboard_auth/test_nous_provider.py @@ -234,6 +234,170 @@ class TestPluginRegister: assert registered._portal_url == "https://portal.nousresearch.com" +# --------------------------------------------------------------------------- +# Plugin entry point: config.yaml + env-override precedence +# --------------------------------------------------------------------------- + + +class TestConfigYamlSource: + """``dashboard.oauth.{client_id,portal_url}`` in ``config.yaml`` is the + canonical surface for these settings. ``HERMES_DASHBOARD_OAUTH_CLIENT_ID`` + and ``HERMES_DASHBOARD_PORTAL_URL`` are operator overrides that win when + set — this is the contract Fly.io's platform-secret injection relies on, + and the contract that lets local devs experiment without setting env + vars. + + Each test pins exactly one tier of the precedence chain so a regression + that flips the order is caught: + + env (when truthy) > config.yaml (when truthy) > plugin default + """ + + @pytest.fixture + def patch_config(self, monkeypatch): + """Yield a callable that replaces ``hermes_cli.config.load_config`` + with a stub returning the given dict. Tests pass the intended + ``dashboard.oauth`` block; the stub returns the wrapping structure.""" + + def _set(oauth_block: Dict[str, Any] | None) -> None: + cfg = {} + if oauth_block is not None: + cfg = {"dashboard": {"oauth": oauth_block}} + monkeypatch.setattr( + "hermes_cli.config.load_config", lambda: cfg + ) + + return _set + + def test_config_yaml_only_client_id_registers(self, patch_config, monkeypatch): + """No env var, only config.yaml — plugin reads from config and + registers successfully. This is the path Teknium's review pushed + for (".env is for secrets only").""" + monkeypatch.delenv("HERMES_DASHBOARD_OAUTH_CLIENT_ID", raising=False) + monkeypatch.delenv("HERMES_DASHBOARD_PORTAL_URL", raising=False) + patch_config({"client_id": "agent:from-config"}) + ctx = MagicMock() + nous_plugin.register(ctx) + ctx.register_dashboard_auth_provider.assert_called_once() + registered = ctx.register_dashboard_auth_provider.call_args.args[0] + assert registered._client_id == "agent:from-config" + # Defaults to production portal URL when neither config nor env + # specifies one. + assert registered._portal_url == "https://portal.nousresearch.com" + + def test_config_yaml_client_id_and_portal_url(self, patch_config, monkeypatch): + monkeypatch.delenv("HERMES_DASHBOARD_OAUTH_CLIENT_ID", raising=False) + monkeypatch.delenv("HERMES_DASHBOARD_PORTAL_URL", raising=False) + patch_config({ + "client_id": "agent:from-config", + "portal_url": "https://staging.portal.example", + }) + ctx = MagicMock() + nous_plugin.register(ctx) + registered = ctx.register_dashboard_auth_provider.call_args.args[0] + assert registered._client_id == "agent:from-config" + assert registered._portal_url == "https://staging.portal.example" + + def test_env_overrides_config_client_id(self, patch_config, monkeypatch): + """Env wins. Critical for Fly.io: the Portal injects + HERMES_DASHBOARD_OAUTH_CLIENT_ID at deploy time and we MUST + honour it even if a stale config.yaml ships in the image.""" + monkeypatch.setenv("HERMES_DASHBOARD_OAUTH_CLIENT_ID", "agent:from-env") + patch_config({"client_id": "agent:from-config"}) + ctx = MagicMock() + nous_plugin.register(ctx) + registered = ctx.register_dashboard_auth_provider.call_args.args[0] + assert registered._client_id == "agent:from-env", ( + "env var must override config.yaml — Fly secret injection " + "depends on this precedence" + ) + + def test_env_overrides_config_portal_url(self, patch_config, monkeypatch): + monkeypatch.setenv("HERMES_DASHBOARD_OAUTH_CLIENT_ID", "agent:x") + monkeypatch.setenv( + "HERMES_DASHBOARD_PORTAL_URL", "https://env.portal.example", + ) + patch_config({ + "client_id": "agent:x", + "portal_url": "https://config.portal.example", + }) + ctx = MagicMock() + nous_plugin.register(ctx) + registered = ctx.register_dashboard_auth_provider.call_args.args[0] + assert registered._portal_url == "https://env.portal.example" + + def test_empty_env_string_does_not_shadow_config( + self, patch_config, monkeypatch + ): + """``HERMES_DASHBOARD_OAUTH_CLIENT_ID=`` (set but empty) is + common in CI/Fly when a secret is provisioned-but-not-populated. + It MUST NOT shadow a valid config.yaml value with an empty + string — operators would lose the gate.""" + monkeypatch.setenv("HERMES_DASHBOARD_OAUTH_CLIENT_ID", "") + patch_config({"client_id": "agent:from-config"}) + ctx = MagicMock() + nous_plugin.register(ctx) + ctx.register_dashboard_auth_provider.assert_called_once() + registered = ctx.register_dashboard_auth_provider.call_args.args[0] + assert registered._client_id == "agent:from-config" + + def test_neither_source_skips_with_helpful_reason( + self, patch_config, monkeypatch + ): + """Neither env nor config.yaml set — skip with a reason that + mentions BOTH surfaces so operators don't guess wrong about + which one to populate.""" + monkeypatch.delenv("HERMES_DASHBOARD_OAUTH_CLIENT_ID", raising=False) + patch_config(None) + ctx = MagicMock() + nous_plugin.register(ctx) + ctx.register_dashboard_auth_provider.assert_not_called() + # Old behaviour: skip reason mentions the env var. + assert "HERMES_DASHBOARD_OAUTH_CLIENT_ID" in nous_plugin.LAST_SKIP_REASON + # New behaviour: skip reason ALSO mentions the config.yaml path + # so the user knows it's a valid alternative. + assert "dashboard.oauth.client_id" in nous_plugin.LAST_SKIP_REASON, ( + f"skip reason omits the config.yaml surface — operators " + f"won't know it exists. got: {nous_plugin.LAST_SKIP_REASON!r}" + ) + + def test_config_yaml_load_failure_falls_through_cleanly( + self, monkeypatch + ): + """If load_config() raises (e.g. malformed YAML, IOError), the + plugin must not crash — it falls through to the env-only path + and either succeeds (if env is set) or surfaces the standard + 'not set' skip reason.""" + monkeypatch.delenv("HERMES_DASHBOARD_OAUTH_CLIENT_ID", raising=False) + + def _broken_load(): + raise OSError("config.yaml not readable") + + monkeypatch.setattr( + "hermes_cli.config.load_config", _broken_load + ) + ctx = MagicMock() + # Must not raise. + nous_plugin.register(ctx) + ctx.register_dashboard_auth_provider.assert_not_called() + + def test_config_yaml_with_non_dict_oauth_section( + self, monkeypatch + ): + """cfg_get handles 'config has a string where a section was + expected' robustly. Verify the plugin inherits that resilience + so a malformed user config doesn't crash startup.""" + monkeypatch.delenv("HERMES_DASHBOARD_OAUTH_CLIENT_ID", raising=False) + monkeypatch.setattr( + "hermes_cli.config.load_config", + lambda: {"dashboard": {"oauth": "wrong type"}}, + ) + ctx = MagicMock() + nous_plugin.register(ctx) + # Falls through to the no-env-and-no-config path. + ctx.register_dashboard_auth_provider.assert_not_called() + + # --------------------------------------------------------------------------- # start_login # --------------------------------------------------------------------------- diff --git a/website/docs/user-guide/features/web-dashboard.md b/website/docs/user-guide/features/web-dashboard.md index 635ded7d80f..44a1cfc897a 100644 --- a/website/docs/user-guide/features/web-dashboard.md +++ b/website/docs/user-guide/features/web-dashboard.md @@ -323,26 +323,45 @@ If the gate would engage but **no** `DashboardAuthProvider` is registered (no No ### Default provider: Nous Research -The bundled `plugins/dashboard_auth/nous` plugin is **always installed** and auto-loaded. It auto-registers a `DashboardAuthProvider` named `nous` when the per-instance client ID is set: +The bundled `plugins/dashboard_auth/nous` plugin is **always installed** and auto-loaded. It auto-registers a `DashboardAuthProvider` named `nous` when a client ID is configured. -| Env var | Required? | Format | Provisioned by | +#### Configuration + +The plugin reads from two surfaces, with the environment variable winning when set non-empty: + +**`config.yaml`** — the canonical surface: + +```yaml +dashboard: + oauth: + client_id: agent:01HXYZ… # required to engage the gate + portal_url: https://portal.nousresearch.com # optional; defaults to production +``` + +**Environment variables** — operator overrides: + +| Env var | Overrides | Format | Provisioned by | |---------|-----------|--------|----------------| -| `HERMES_DASHBOARD_OAUTH_CLIENT_ID` | **yes** | `agent:{instance_id}` | Nous Portal at Fly.io provisioning time | -| `HERMES_DASHBOARD_PORTAL_URL` | no | `https://portal.nousresearch.com` (default) | Portal — override only for staging or a custom deployment | +| `HERMES_DASHBOARD_OAUTH_CLIENT_ID` | `dashboard.oauth.client_id` | `agent:{instance_id}` | Nous Portal at Fly.io provisioning time | +| `HERMES_DASHBOARD_PORTAL_URL` | `dashboard.oauth.portal_url` | URL (default: `https://portal.nousresearch.com`) | Portal — override only for staging or a custom deployment | -`HERMES_DASHBOARD_OAUTH_CLIENT_ID` is the only required variable; it's injected automatically when you deploy through the Nous Portal. The portal URL defaults to production, so the typical operator never touches it — set it explicitly only if you're pointing at staging (`portal.rewbs.uk`) or a custom Portal deployment. +Per the Hermes Agent convention (`~/.hermes/.env` is for API keys / secrets only), **`config.yaml` is the recommended place to set these values** for local dev, on-prem, and any deployment you control directly. The environment-variable path exists so Fly.io's platform-secret injection can push per-deploy `client_id`s without anyone having to edit `config.yaml` inside the image — that's its primary purpose. -If `HERMES_DASHBOARD_OAUTH_CLIENT_ID` is absent or malformed, the plugin reports the specific reason and the dashboard's fail-closed bind error tells you exactly what to fix: +Empty environment values are treated as unset, so a provisioned-but-not-populated Fly secret can't accidentally shadow a valid `config.yaml` entry. + +If neither source provides a client_id, the plugin reports the specific reason and the dashboard's fail-closed bind error tells you exactly what to fix: ``` Refusing to bind dashboard to 0.0.0.0 — the OAuth auth gate engages on non-loopback binds, but no auth providers are registered. Bundled providers reported these issues: - • nous: HERMES_DASHBOARD_OAUTH_CLIENT_ID is not set. The Nous Portal + • nous: HERMES_DASHBOARD_OAUTH_CLIENT_ID is not set (and + dashboard.oauth.client_id in config.yaml is empty). The Nous Portal provisions this env var (shape 'agent:{instance_id}') when it deploys a Hermes Agent instance — set it to your provisioned - client id, or pass --insecure to skip the OAuth gate entirely. + client id (either as an env var or under dashboard.oauth.client_id + in config.yaml), or pass --insecure to skip the OAuth gate entirely. Or pass --insecure to skip the auth gate (NOT recommended on untrusted networks). @@ -406,11 +425,20 @@ The login page lists all registered providers; multiple providers can be stacked ### Verifying the gate is on ```bash -# Run the dashboard with the gate engaged (Fly.io shape). -# HERMES_DASHBOARD_PORTAL_URL is optional — defaults to production. +# Quick env-var path (Fly.io shape). HERMES_DASHBOARD_PORTAL_URL is +# optional — defaults to production. HERMES_DASHBOARD_OAUTH_CLIENT_ID=agent:test \ hermes dashboard --host 0.0.0.0 +# Or the equivalent via config.yaml (recommended for local dev / on-prem): +# +# dashboard: +# oauth: +# client_id: agent:test +# +# then just: +hermes dashboard --host 0.0.0.0 + # Hit /api/status to see the gate state: curl -s http://127.0.0.1:9119/api/status | jq '.auth_required, .auth_providers' # true