diff --git a/docker/s6-rc.d/dashboard/run b/docker/s6-rc.d/dashboard/run index 31c75ad4189..d6cfa3f0940 100755 --- a/docker/s6-rc.d/dashboard/run +++ b/docker/s6-rc.d/dashboard/run @@ -30,13 +30,21 @@ cd /opt/data dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}" dash_port="${HERMES_DASHBOARD_PORT:-9119}" -# Binding to anything other than localhost requires --insecure — the -# dashboard refuses otherwise because it exposes API keys. Inside a -# container this is the expected deployment. +# `--insecure` is opt-in via HERMES_DASHBOARD_INSECURE. The dashboard's +# OAuth auth gate engages automatically on non-loopback binds when a +# DashboardAuthProvider is registered (e.g. the bundled dashboard_auth/nous +# provider, which auto-registers when HERMES_DASHBOARD_OAUTH_CLIENT_ID is +# set). If no provider is registered, start_server fails closed with a +# specific operator-facing error. +# +# This used to derive --insecure from the bind host ("anything non-loopback +# implies insecure"), but that predates the OAuth gate and silently +# disabled it on every container-deployed dashboard. The gate is now the +# authority; operators on trusted LANs / behind a reverse proxy without +# the OAuth contract opt in explicitly. insecure="" -case "$dash_host" in - 127.0.0.1|localhost) ;; - *) insecure="--insecure" ;; +case "${HERMES_DASHBOARD_INSECURE:-}" in + 1|true|TRUE|True|yes|YES|Yes) insecure="--insecure" ;; esac # shellcheck disable=SC2086 # word-splitting of $insecure is intentional diff --git a/tests/docker/test_dashboard.py b/tests/docker/test_dashboard.py index 56d4fa41c8a..db27a1c4d29 100644 --- a/tests/docker/test_dashboard.py +++ b/tests/docker/test_dashboard.py @@ -12,6 +12,7 @@ the realistic runtime context. See the conftest module docstring. """ from __future__ import annotations +import json import subprocess import time @@ -201,3 +202,106 @@ def test_dashboard_restarts_after_crash( raise AssertionError( f"Dashboard not restarted after kill (first_pid={first_pid})" ) + + +# --------------------------------------------------------------------------- +# OAuth auth-gate behaviour — regression guard for the dashboard-insecure +# auto-injection bug. Pre-fix, the s6 run script appended `--insecure` +# whenever `HERMES_DASHBOARD_HOST` was non-loopback, silently disabling +# the OAuth gate on every container-deployed dashboard. The matching +# static-text guard lives in tests/test_docker_home_override_scripts.py; +# this is the behavioural end-to-end check. +# --------------------------------------------------------------------------- + + +def _fetch_api_status(container: str, *, deadline_s: float = 60.0) -> dict: + """Poll ``/api/status`` from inside the container via the venv python. + + The dashboard binds to ``HERMES_DASHBOARD_HOST`` (typically ``0.0.0.0``) + so loopback inside the container works. The image doesn't ship + ``curl`` but Python's stdlib ``urllib`` is good enough. + + Returns the decoded JSON dict on success; raises AssertionError on + timeout. + """ + probe = ( + "/opt/hermes/.venv/bin/python -c " + "'import json,urllib.request as u;" + "print(u.urlopen(\"http://127.0.0.1:9119/api/status\",timeout=5)" + ".read().decode())'" + ) + end = time.monotonic() + deadline_s + last_err = "" + while time.monotonic() < end: + r = docker_exec_sh(container, probe, timeout=10) + if r.returncode == 0 and r.stdout.strip(): + try: + return json.loads(r.stdout) + except (ValueError, json.JSONDecodeError) as exc: # noqa: F841 + last_err = f"json parse: {exc!r} / stdout={r.stdout!r}" + else: + last_err = f"rc={r.returncode} stderr={r.stderr!r}" + time.sleep(0.5) + raise AssertionError( + f"/api/status never returned valid JSON within {deadline_s}s; " + f"last error: {last_err}" + ) + + +def test_dashboard_oauth_gate_engages_on_non_loopback_bind( + built_image: str, container_name: str, +) -> None: + """The s6 dashboard run script must NOT auto-add ``--insecure`` when the + dashboard binds to ``0.0.0.0``. The OAuth auth gate engages on its own + when a ``DashboardAuthProvider`` is registered (the bundled nous + provider activates whenever ``HERMES_DASHBOARD_OAUTH_CLIENT_ID`` is + set). + + Regression guard for the wildcard-subdomain rollout where every + portal-provisioned agent binds ``0.0.0.0`` and relies on the OAuth + gate to authenticate browser callers. Before this fix, the run script + flipped ``--insecure`` on for any non-loopback bind, which routed + ``start_server`` straight back into the legacy ``allow_public=True`` + branch and disabled the gate every time. + """ + subprocess.run( + ["docker", "run", "-d", "--name", container_name, + "-e", "HERMES_DASHBOARD=1", + "-e", "HERMES_DASHBOARD_HOST=0.0.0.0", + "-e", "HERMES_DASHBOARD_OAUTH_CLIENT_ID=agent:test-instance", + built_image, "sleep", "120"], + check=True, capture_output=True, timeout=30, + ) + status = _fetch_api_status(container_name) + assert status.get("auth_required") is True, ( + "OAuth gate must be engaged on 0.0.0.0 bind when a provider is " + "registered and HERMES_DASHBOARD_INSECURE is unset. Got: " + f"{status!r}" + ) + assert "nous" in status.get("auth_providers", []), ( + "Bundled dashboard_auth/nous provider should register when " + f"HERMES_DASHBOARD_OAUTH_CLIENT_ID is set. Got: {status!r}" + ) + + +def test_dashboard_insecure_env_var_opts_out_of_gate( + built_image: str, container_name: str, +) -> None: + """``HERMES_DASHBOARD_INSECURE=1`` re-enables the legacy no-gate mode + for operators running on trusted LANs behind a reverse proxy without + the OAuth contract. Same opt-out shape as the rest of the s6 boolean + envs (``HERMES_DASHBOARD``, ``HERMES_DASHBOARD_TUI``). + """ + subprocess.run( + ["docker", "run", "-d", "--name", container_name, + "-e", "HERMES_DASHBOARD=1", + "-e", "HERMES_DASHBOARD_HOST=0.0.0.0", + "-e", "HERMES_DASHBOARD_INSECURE=1", + built_image, "sleep", "120"], + check=True, capture_output=True, timeout=30, + ) + status = _fetch_api_status(container_name) + assert status.get("auth_required") is False, ( + "HERMES_DASHBOARD_INSECURE=1 must disable the auth gate (explicit " + f"opt-in for trusted-LAN deployments). Got: {status!r}" + ) diff --git a/tests/test_docker_home_override_scripts.py b/tests/test_docker_home_override_scripts.py index d51ae06e17a..0ad9f61c9ad 100644 --- a/tests/test_docker_home_override_scripts.py +++ b/tests/test_docker_home_override_scripts.py @@ -13,3 +13,36 @@ def test_dashboard_run_resets_home_before_dropping_privileges() -> None: assert "#!/command/with-contenv sh" in text assert "export HOME=/opt/data" in text assert "exec s6-setuidgid hermes hermes dashboard" in text + + +def test_dashboard_run_does_not_derive_insecure_from_bind_host() -> None: + """The s6 dashboard run script MUST NOT auto-add ``--insecure`` based on + ``HERMES_DASHBOARD_HOST``. Doing so disables the OAuth auth gate on + every non-loopback bind even when an auth provider is registered — + the exact regression that exposed every wildcard-subdomain agent + dashboard publicly until early 2026. + + The opt-in is now explicit: ``HERMES_DASHBOARD_INSECURE=1`` (truthy). + The auth gate is the authority on whether non-loopback binds are safe. + """ + text = DASHBOARD_RUN.read_text(encoding="utf-8") + + # No legacy host-derived flip. + assert '127.0.0.1|localhost' not in text, ( + "Run script still derives --insecure from the bind host. The gate " + "is the authority now — opt in via HERMES_DASHBOARD_INSECURE instead." + ) + assert 'case "$dash_host" in' not in text, ( + "Legacy host-derived --insecure case-statement is back." + ) + + # New opt-in env var present. + assert "HERMES_DASHBOARD_INSECURE" in text, ( + "Explicit HERMES_DASHBOARD_INSECURE opt-in is missing." + ) + # Truthy values aligned with the rest of the s6 scripts + # (HERMES_DASHBOARD, HERMES_DASHBOARD_TUI). + for truthy in ("1", "true", "TRUE", "True", "yes", "YES", "Yes"): + assert truthy in text, ( + f"HERMES_DASHBOARD_INSECURE should accept truthy value {truthy!r}" + ) diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md index bb049fac8d3..0350b07a463 100644 --- a/website/docs/user-guide/docker.md +++ b/website/docs/user-guide/docker.md @@ -101,8 +101,29 @@ The entrypoint starts `hermes dashboard` in the background (running as the non-r | `HERMES_DASHBOARD_HOST` | Bind address for the dashboard HTTP server | `127.0.0.1` | | `HERMES_DASHBOARD_PORT` | Port for the dashboard HTTP server | `9119` | | `HERMES_DASHBOARD_TUI` | Set to `1` to expose the in-browser Chat tab (embedded `hermes --tui` via PTY/WebSocket) | *(unset)* | +| `HERMES_DASHBOARD_INSECURE` | Set to `1` (or `true` / `yes`) to bind without the OAuth auth gate. Only use on trusted networks behind a reverse proxy without the OAuth contract — the dashboard exposes API keys and session data | *(unset — gate enforced when a `DashboardAuthProvider` is registered)* | -By default, the dashboard stays on loopback to avoid exposing the unauthenticated web surface over the network. To publish it intentionally, set `HERMES_DASHBOARD_HOST=0.0.0.0` and configure your own trusted network boundary/reverse proxy. In that case you must explicitly add `--insecure` behavior by passing host/flags in your command path (the entrypoint no longer auto-enables insecure mode). +By default, the dashboard stays on loopback (`127.0.0.1`) to avoid exposing +the web surface over the network. To publish it intentionally, set +`HERMES_DASHBOARD_HOST=0.0.0.0`. The dashboard's OAuth auth gate engages +automatically whenever: + +1. The bind host is non-loopback, **and** +2. A `DashboardAuthProvider` plugin is registered. + +The bundled `dashboard_auth/nous` provider activates whenever +`HERMES_DASHBOARD_OAUTH_CLIENT_ID` is set (see +[Web Dashboard → Authentication](features/web-dashboard.md)). With the +gate engaged, browser callers are redirected to the configured portal's +OAuth flow before they can reach any protected route. + +If no provider is registered and the bind is non-loopback, the dashboard +**fails closed at startup** with a specific error pointing at the +missing env var. To opt out of the gate explicitly — for a trusted-LAN +deployment behind your own reverse proxy without the OAuth contract — +set `HERMES_DASHBOARD_INSECURE=1`. This re-enables the legacy "no auth, +loud warning" mode and is the only path that disables the gate; the bind +host does not implicitly determine `--insecure` anymore. :::note The dashboard runs as a supervised s6 service inside the container. If diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md index 0f3dde59dd2..b3714bc7922 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md @@ -80,8 +80,28 @@ docker run -d \ | `HERMES_DASHBOARD_HOST` | dashboard HTTP 服务器的绑定地址 | `127.0.0.1` | | `HERMES_DASHBOARD_PORT` | dashboard HTTP 服务器的端口 | `9119` | | `HERMES_DASHBOARD_TUI` | 设为 `1` 以启用浏览器内 Chat 标签页(通过 PTY/WebSocket 嵌入 `hermes --tui`) | *(未设置)* | +| `HERMES_DASHBOARD_INSECURE` | 设为 `1`(或 `true` / `yes`)以在不启用 OAuth 鉴权门控的情况下绑定。仅在可信网络(且通过没有 OAuth 契约的反向代理时)使用——dashboard 会暴露 API 密钥与会话数据 | *(未设置——当注册了 `DashboardAuthProvider` 时启用门控)* | -默认情况下,dashboard 保持在回环地址,以避免将未经身份验证的 Web 界面暴露到网络。若要有意发布,请设置 `HERMES_DASHBOARD_HOST=0.0.0.0` 并配置你自己的可信网络边界/反向代理。在这种情况下,你必须通过命令路径中的 host/flags 显式添加 `--insecure` 行为(入口点不再自动启用不安全模式)。 +默认情况下,dashboard 保持在回环地址(`127.0.0.1`),以避免将 +Web 界面暴露到网络。若要有意发布,请设置 +`HERMES_DASHBOARD_HOST=0.0.0.0`。当以下两项同时满足时, +dashboard 的 OAuth 鉴权门控会自动启用: + +1. 绑定地址为非回环地址,**且** +2. 注册了一个 `DashboardAuthProvider` 插件。 + +捆绑的 `dashboard_auth/nous` 提供者会在设置 +`HERMES_DASHBOARD_OAUTH_CLIENT_ID` 时自动激活(参见 +[Web Dashboard → 鉴权](features/web-dashboard.md))。门控启用后, +浏览器调用方会先被重定向到所配置门户的 OAuth 流,然后才能 +访问任何受保护路由。 + +如果未注册提供者且绑定为非回环地址,dashboard **会在启动时 +失败关闭**,并给出指向缺失环境变量的具体错误信息。要显式 +退出门控——用于不使用 OAuth 契约、通过你自己的反向代理部署 +在可信局域网中的场景——请设置 `HERMES_DASHBOARD_INSECURE=1`。 +这会恢复旧的“无鉴权,但发出告警”模式,也是唯一可以禁用门控的 +路径;绑定地址不再隐式决定 `--insecure`。 :::note dashboard 在容器内作为受监管的 s6 服务运行。如果