From eb51c180e6484ec15809d04c25a8115e6e48dc3c Mon Sep 17 00:00:00 2001 From: teknium1 <127238744+teknium1@users.noreply.github.com> Date: Sun, 21 Jun 2026 17:32:56 -0700 Subject: [PATCH] fix(docker): replace dashboard --insecure with basic-auth provider The s6 dashboard entrypoint and docker integration tests relied on HERMES_DASHBOARD_INSECURE=1 to bring up a 0.0.0.0 dashboard with no auth provider. With --insecure now a no-op (auth gate mandatory on non-loopback binds), that path fails closed. - s6 dashboard/run: drop --insecure derivation; warn that the env is a no-op and point operators at HERMES_DASHBOARD_BASIC_AUTH_* / OAuth. - docker tests: supervision tests now register the bundled basic password provider (HERMES_DASHBOARD_BASIC_AUTH_USERNAME/_PASSWORD) so the gate has a provider and the dashboard binds. Rewrote the insecure-opt-out test to assert fail-closed (dashboard does NOT serve) instead of gate-bypass. - docs (en + zh-Hans): HERMES_DASHBOARD_INSECURE documented as deprecated no-op; basic-auth is the zero-infra way to authenticate a containerized public dashboard. --- docker/s6-rc.d/dashboard/run | 35 ++++++----- tests/docker/test_dashboard.py | 62 ++++++++++--------- website/docs/user-guide/docker.md | 8 +-- .../current/user-guide/docker.md | 16 ++--- 4 files changed, 64 insertions(+), 57 deletions(-) diff --git a/docker/s6-rc.d/dashboard/run b/docker/s6-rc.d/dashboard/run index d6fd29cafd3..2eb0cf9cb18 100755 --- a/docker/s6-rc.d/dashboard/run +++ b/docker/s6-rc.d/dashboard/run @@ -30,26 +30,27 @@ cd /opt/data dash_host="${HERMES_DASHBOARD_HOST:-0.0.0.0}" dash_port="${HERMES_DASHBOARD_PORT:-9119}" -# `--insecure` is opt-in via HERMES_DASHBOARD_INSECURE. The dashboard's -# OAuth auth gate engages automatically on non-loopback binds when a -# DashboardAuthProvider is registered (e.g. the bundled dashboard_auth/nous -# provider, which auto-registers when HERMES_DASHBOARD_OAUTH_CLIENT_ID is -# set). If no provider is registered, start_server fails closed with a -# specific operator-facing error. +# The dashboard's auth gate engages automatically on non-loopback binds and +# REQUIRES a DashboardAuthProvider to be registered, else start_server fails +# closed. Two zero-infra ways to satisfy it in a container: +# • Password: set HERMES_DASHBOARD_BASIC_AUTH_USERNAME + _PASSWORD (bundled +# dashboard_auth/basic provider — no external IDP). +# • OAuth: set HERMES_DASHBOARD_OAUTH_CLIENT_ID (bundled nous provider). # -# This used to derive --insecure from the bind host ("anything non-loopback -# implies insecure"), but that predates the OAuth gate and silently -# disabled it on every container-deployed dashboard. The gate is now the -# authority; operators on trusted LANs / behind a reverse proxy without -# the OAuth contract opt in explicitly. -insecure="" +# HERMES_DASHBOARD_INSECURE no longer disables the gate (June 2026 hardening: +# unauthenticated public dashboards were the entry point for the MCP-config +# persistence campaign). It is accepted but ignored; warn if set so operators +# migrate to a real provider. case "${HERMES_DASHBOARD_INSECURE:-}" in - 1|true|TRUE|True|yes|YES|Yes) insecure="--insecure" ;; + 1|true|TRUE|True|yes|YES|Yes) + echo "[dashboard] HERMES_DASHBOARD_INSECURE no longer disables the auth gate." >&2 + echo "[dashboard] A non-loopback dashboard requires an auth provider:" >&2 + echo "[dashboard] set HERMES_DASHBOARD_BASIC_AUTH_USERNAME + _PASSWORD (password)" >&2 + echo "[dashboard] or HERMES_DASHBOARD_OAUTH_CLIENT_ID (OAuth)." >&2 + ;; esac # Skip the drop when already non-root. -# shellcheck disable=SC2086 # word-splitting of $insecure is intentional -[ "$(id -u)" = 0 ] || exec hermes dashboard --host "$dash_host" --port "$dash_port" --no-open $insecure -# shellcheck disable=SC2086 # word-splitting of $insecure is intentional +[ "$(id -u)" = 0 ] || exec hermes dashboard --host "$dash_host" --port "$dash_port" --no-open exec s6-setuidgid hermes hermes dashboard \ - --host "$dash_host" --port "$dash_port" --no-open $insecure + --host "$dash_host" --port "$dash_port" --no-open diff --git a/tests/docker/test_dashboard.py b/tests/docker/test_dashboard.py index 91dc1051b99..800414f58ee 100644 --- a/tests/docker/test_dashboard.py +++ b/tests/docker/test_dashboard.py @@ -95,7 +95,8 @@ def test_dashboard_slot_reports_up_when_enabled( # would fail closed and the slot would never come up. Pin the # explicit insecure opt-in to keep this test focused on the s6 # supervision contract, not the auth gate. - "-e", "HERMES_DASHBOARD_INSECURE=1", + "-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin", + "-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw", built_image, "sleep", "120"], check=True, capture_output=True, timeout=30, ) @@ -122,10 +123,12 @@ def test_dashboard_opt_in_starts( subprocess.run( ["docker", "run", "-d", "--name", container_name, "-e", "HERMES_DASHBOARD=1", - # Default bind is 0.0.0.0; pin insecure opt-in so the auth gate - # doesn't fail-closed before the process can come up. See - # test_dashboard_slot_reports_up_when_enabled for the full rationale. - "-e", "HERMES_DASHBOARD_INSECURE=1", + # Default bind is 0.0.0.0, which engages the auth gate. Register the + # bundled basic password provider so the gate has a provider and the + # dashboard binds (vs fail-closed). Keeps the test focused on s6 + # supervision, not auth. + "-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin", + "-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw", built_image, "sleep", "120"], check=True, capture_output=True, timeout=30, ) @@ -145,10 +148,11 @@ def test_dashboard_port_override( subprocess.run( ["docker", "run", "-d", "--name", container_name, "-e", "HERMES_DASHBOARD=1", "-e", "HERMES_DASHBOARD_PORT=9120", - # Default bind is 0.0.0.0; pin insecure opt-in so the auth gate - # doesn't fail-closed before the port is bound. See + # Default bind is 0.0.0.0; register the basic password provider so + # the auth gate has a provider and the dashboard binds. See # test_dashboard_slot_reports_up_when_enabled for the full rationale. - "-e", "HERMES_DASHBOARD_INSECURE=1", + "-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin", + "-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw", built_image, "sleep", "120"], check=True, capture_output=True, timeout=30, ) @@ -179,11 +183,12 @@ def test_dashboard_restarts_after_crash( subprocess.run( ["docker", "run", "-d", "--name", container_name, "-e", "HERMES_DASHBOARD=1", - # Default bind is 0.0.0.0; pin insecure opt-in so the auth gate - # doesn't fail-closed before the supervised dashboard can come up. + # Default bind is 0.0.0.0; register the basic password provider so + # the auth gate has a provider and the supervised dashboard binds. # See test_dashboard_slot_reports_up_when_enabled for the full # rationale. - "-e", "HERMES_DASHBOARD_INSECURE=1", + "-e", "HERMES_DASHBOARD_BASIC_AUTH_USERNAME=admin", + "-e", "HERMES_DASHBOARD_BASIC_AUTH_PASSWORD=test-dashboard-pw", built_image, "sleep", "120"], check=True, capture_output=True, timeout=30, ) @@ -383,17 +388,15 @@ def test_dashboard_oauth_gate_engages_on_non_loopback_bind( ) -def test_dashboard_insecure_env_var_opts_out_of_gate( +def test_dashboard_insecure_env_var_no_longer_bypasses_gate( built_image: str, container_name: str, ) -> None: - """``HERMES_DASHBOARD_INSECURE=1`` re-enables the legacy no-gate mode - for operators running on trusted LANs behind a reverse proxy without - the OAuth contract. Same opt-out shape as the rest of the s6 boolean - envs (e.g. ``HERMES_DASHBOARD``). - - With the gate off, ``/api/status`` (a public endpoint under the - legacy ``_SESSION_TOKEN`` middleware) returns 200 with the - ``auth_required: false`` body — proves the gate is bypassed. + """``HERMES_DASHBOARD_INSECURE=1`` NO LONGER disables the auth gate + (June 2026 hardening). With insecure set on a 0.0.0.0 bind and NO auth + provider registered, start_server fails closed — the dashboard never + binds, so ``/api/status`` is unreachable. This proves the unauthenticated + public-dashboard escape hatch is gone: there is no env that serves the + dashboard on a public bind without an auth provider. """ subprocess.run( ["docker", "run", "-d", "--name", container_name, @@ -403,13 +406,16 @@ def test_dashboard_insecure_env_var_opts_out_of_gate( built_image, "sleep", "120"], check=True, capture_output=True, timeout=30, ) - status_code, body = _http_probe(container_name, "/api/status") - assert status_code == 200, ( - f"/api/status should return 200 with the auth gate disabled; " - f"got {status_code} body={body!r}" + # Fail-closed: the dashboard process must NOT successfully serve. Probe + # for a few seconds; /api/status should never become reachable because + # start_server raised SystemExit before binding. + ok, _ = _poll( + container_name, + "curl -fsS -m 2 http://127.0.0.1:9119/api/status >/dev/null 2>&1", + deadline_s=12.0, ) - status = json.loads(body) - assert status.get("auth_required") is False, ( - "HERMES_DASHBOARD_INSECURE=1 must disable the auth gate (explicit " - f"opt-in for trusted-LAN deployments). Got: {status!r}" + assert not ok, ( + "Dashboard must NOT serve on a public bind with --insecure and no " + "auth provider — the gate fails closed. /api/status became reachable, " + "meaning the unauthenticated escape hatch is still open." ) diff --git a/website/docs/user-guide/docker.md b/website/docs/user-guide/docker.md index eb568182570..c4b8c73908b 100644 --- a/website/docs/user-guide/docker.md +++ b/website/docs/user-guide/docker.md @@ -121,7 +121,7 @@ The dashboard is supervised by s6 — if it crashes, `s6-supervise` restarts it | `HERMES_DASHBOARD` | Set to `1` (or `true` / `yes`) to enable the supervised dashboard service | *(unset — service is registered but stays down)* | | `HERMES_DASHBOARD_HOST` | Bind address for the dashboard HTTP server | `0.0.0.0` | | `HERMES_DASHBOARD_PORT` | Port for the dashboard HTTP server | `9119` | -| `HERMES_DASHBOARD_INSECURE` | Set to `1` (or `true` / `yes`) to bind without the OAuth auth gate. Only use on trusted networks behind a reverse proxy without the OAuth contract — the dashboard exposes API keys and session data | *(unset — gate enforced when a `DashboardAuthProvider` is registered)* | +| `HERMES_DASHBOARD_INSECURE` | **Deprecated / no-op.** Formerly bypassed the auth gate; as of the June 2026 hardening it no longer disables authentication. A non-loopback bind always requires an auth provider | *(ignored — configure a provider instead)* | The dashboard inside the container defaults to binding `0.0.0.0` — without it, the published `-p 9119:9119` port would not be reachable from the host. To restrict the bind to container loopback (for sidecar / reverse-proxy setups), set `HERMES_DASHBOARD_HOST=127.0.0.1`. @@ -138,10 +138,10 @@ There are three bundled ways to satisfy the second condition: Whichever you choose, the gate redirects callers to a login page before they can reach any protected route. See [Web Dashboard → Authentication](features/web-dashboard.md#authentication-gated-mode) for all three providers. -If no provider is registered and the bind is non-loopback, the dashboard **fails closed at startup** with a specific error pointing at the missing env var. The `HERMES_DASHBOARD_INSECURE=1` escape hatch disables the gate entirely (the bind host alone never implies `--insecure`), but it serves an unauthenticated dashboard — configure a provider instead unless you have your own auth layer in front. +If no provider is registered and the bind is non-loopback, the dashboard **fails closed at startup** with a specific error pointing at the missing env var. There is no longer an escape hatch that serves the dashboard unauthenticated on a public bind: `HERMES_DASHBOARD_INSECURE=1` is now a deprecated no-op (it logs a warning and is ignored). Configure a provider, or bind `HERMES_DASHBOARD_HOST=127.0.0.1` and reach the dashboard over an SSH tunnel / Tailscale instead. -:::warning `HERMES_DASHBOARD_INSECURE=1` exposes API keys -Opting out of the OAuth gate serves the dashboard's API surface (including model keys and session data) to anyone who can reach the published port. Only enable it when you have your own auth layer in front, or on a trusted LAN you fully control. +:::warning Why `--insecure` was removed +An unauthenticated public dashboard was the entry point for the June 2026 MCP-config persistence campaign: internet scanners reached exposed dashboards (and OpenAI API servers) and drove the agent into planting an SSH-key backdoor. The auth gate is now mandatory on every non-loopback bind. For a trusted-LAN / homelab box, the bundled username/password provider (`HERMES_DASHBOARD_BASIC_AUTH_USERNAME` + `_PASSWORD`) is the zero-infra way to satisfy it. ::: Running the dashboard as a separate container **is** supported when that container shares the host PID and network namespace (e.g. `network_mode: host`, as the repo's own `docker-compose.yml` does — see its `dashboard` service). Its gateway-liveness detection requires a shared PID namespace with the gateway process, so the limitation only applies to dashboards run in isolated bridge-network containers without a shared PID namespace. diff --git a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md index 8ab80266e3b..8b1609ef12b 100644 --- a/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md +++ b/website/i18n/zh-Hans/docusaurus-plugin-content-docs/current/user-guide/docker.md @@ -80,7 +80,7 @@ Dashboard 由 s6 监管:若进程崩溃,`s6-supervise` 会在短暂退避后 | `HERMES_DASHBOARD` | 设为 `1`(或 `true` / `yes`)以启用受监管的 dashboard 服务 | *(未设置——服务已注册但保持关闭)* | | `HERMES_DASHBOARD_HOST` | dashboard HTTP 服务器的绑定地址 | `0.0.0.0` | | `HERMES_DASHBOARD_PORT` | dashboard HTTP 服务器的端口 | `9119` | -| `HERMES_DASHBOARD_INSECURE` | 设为 `1`(或 `true` / `yes`)以在不启用 OAuth 鉴权门控的情况下绑定。仅在可信网络(且通过没有 OAuth 契约的反向代理时)使用——dashboard 会暴露 API 密钥与会话数据 | *(未设置——当注册了 `DashboardAuthProvider` 时启用门控)* | +| `HERMES_DASHBOARD_INSECURE` | **已弃用 / 空操作。** 以前用于绕过鉴权门控;自 2026 年 6 月的安全加固起,它不再禁用鉴权。任何非回环绑定都必须配置鉴权提供方 | *(被忽略——请改为配置提供方)* | 容器内的 dashboard 默认绑定 `0.0.0.0`,否则发布的 `-p 9119:9119` 端口将无法从宿主机访问。若你要把它限制在容器回环地址(例如 sidecar / 反向代理拓扑),请显式设置 `HERMES_DASHBOARD_HOST=127.0.0.1`。 @@ -98,14 +98,14 @@ Dashboard 由 s6 监管:若进程崩溃,`s6-supervise` 会在短暂退避后 无论选择哪种,调用方在访问受保护路由前都会先被重定向到登录页。完整说明见 [Web Dashboard → 鉴权](features/web-dashboard.md)。 如果未注册提供者且绑定为非回环地址,dashboard **会在启动时 -失败关闭**,并给出指向缺失环境变量的具体错误信息。要显式 -退出门控——用于不使用 OAuth 契约、通过你自己的反向代理部署 -在可信局域网中的场景——请设置 `HERMES_DASHBOARD_INSECURE=1`。 -这会恢复旧的“无鉴权,但发出告警”模式,也是唯一可以禁用门控的 -路径;绑定地址不再隐式决定 `--insecure`。 +失败关闭**,并给出指向缺失环境变量的具体错误信息。现在已不再 +存在以无鉴权方式在公网绑定上提供 dashboard 的“逃生通道”: +`HERMES_DASHBOARD_INSECURE=1` 现在是一个已弃用的空操作(它会 +打印告警并被忽略)。请改为配置鉴权提供方,或设置 +`HERMES_DASHBOARD_HOST=127.0.0.1` 并通过 SSH 隧道 / Tailscale 访问。 -:::warning `HERMES_DASHBOARD_INSECURE=1` 会暴露 API 密钥 -关闭鉴权门控会让任何能访问已发布端口的人都能看到 dashboard 的 API 面(包括模型密钥与会话数据)。除非你前面已经有自己的鉴权层,或它只运行在你完全信任的局域网内,否则不要启用它。 +:::warning 为什么移除了 `--insecure` +无鉴权的公网 dashboard 是 2026 年 6 月 MCP 配置持久化攻击活动的入口:互联网扫描器访问到暴露的 dashboard(以及 OpenAI API 服务器),诱导 agent 植入 SSH 密钥后门。现在每个非回环绑定都强制启用鉴权门控。对于可信局域网 / homelab 主机,内置的用户名/密码提供方(`HERMES_DASHBOARD_BASIC_AUTH_USERNAME` + `_PASSWORD`)是满足该要求的零基础设施方式。 ::: 当独立的 dashboard 容器与宿主机共享 PID 与网络命名空间时(例如 `network_mode: host`,正如仓库自带的 `docker-compose.yml` 中的 `dashboard` 服务那样),**是**支持将 dashboard 作为独立容器运行的。其 gateway 存活检测需要与 gateway 进程共享 PID 命名空间,因此该限制仅适用于在隔离的 bridge 网络容器中、且未共享 PID 命名空间的 dashboard。