From c6febe37658d34d1c5de4f44136f2dd71daa0e1b Mon Sep 17 00:00:00 2001 From: Ben Date: Thu, 21 May 2026 14:20:54 +1000 Subject: [PATCH] ci(docker): add hadolint + shellcheck for container build inputs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Phase 0.5 of the s6-overlay supervision plan. Catches Dockerfile and shell-script regressions that the behavioral docker-publish smoke test can't surface — unquoted variable expansions, silently-failing RUN commands, missing apt-get clean, etc. Both lint clean against the current (tini) Dockerfile + entrypoint.sh at the configured thresholds (hadolint: warning, shellcheck: error). Each ignore in .hadolint.yaml carries a one-line justification; the shellcheck severity floor is documented in the workflow file. Refs: docs/plans/2026-05-07-s6-overlay-dynamic-subagent-gateways.md --- .github/workflows/docker-lint.yml | 68 +++++++++++++++++++++++++++++++ .hadolint.yaml | 37 +++++++++++++++++ 2 files changed, 105 insertions(+) create mode 100644 .github/workflows/docker-lint.yml create mode 100644 .hadolint.yaml diff --git a/.github/workflows/docker-lint.yml b/.github/workflows/docker-lint.yml new file mode 100644 index 00000000000..f1673813e99 --- /dev/null +++ b/.github/workflows/docker-lint.yml @@ -0,0 +1,68 @@ +name: Docker / shell lint + +# Lints the container build inputs: Dockerfile (via hadolint) and any shell +# scripts under docker/ (via shellcheck). These catch the class of regression +# the behavioral docker-publish smoke test can't — unquoted variable +# expansions, silently-failing RUN commands, etc. +# +# Rules and ignores are documented in .hadolint.yaml at the repo root. +# shellcheck severity is pinned to `error` so SC1091-style "can't follow +# sourced script" info-level warnings don't fail the job — the .venv +# activate script doesn't exist at lint time. + +on: + push: + branches: [main] + paths: + - Dockerfile + - docker/** + - .hadolint.yaml + - .github/workflows/docker-lint.yml + pull_request: + branches: [main] + paths: + - Dockerfile + - docker/** + - .hadolint.yaml + - .github/workflows/docker-lint.yml + +permissions: + contents: read + +concurrency: + group: docker-lint-${{ github.ref }} + cancel-in-progress: true + +jobs: + hadolint: + name: Lint Dockerfile (hadolint) + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: hadolint + uses: hadolint/hadolint-action@54c9adbab1582c2ef04b2016b760714a4bfde3cf # v3.1.0 + with: + dockerfile: Dockerfile + config: .hadolint.yaml + failure-threshold: warning + + shellcheck: + name: Lint docker/ shell scripts (shellcheck) + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Checkout code + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: shellcheck + uses: ludeeus/action-shellcheck@00cae500b08a931fb5698e11e79bfbd38e612a38 # v2.0.0 + env: + # Severity = error: SC1091 (can't follow sourced script) is info- + # level and would otherwise fail when the venv activate script + # doesn't exist at lint time. + SHELLCHECK_OPTS: --severity=error + with: + scandir: ./docker diff --git a/.hadolint.yaml b/.hadolint.yaml new file mode 100644 index 00000000000..295211278a7 --- /dev/null +++ b/.hadolint.yaml @@ -0,0 +1,37 @@ +# hadolint configuration for the Hermes Agent Dockerfile. +# See https://github.com/hadolint/hadolint#configure for rules. +# +# We want hadolint to surface NEW Dockerfile lint regressions, but we +# don't want to rewrite the existing image to silence rules that are +# either intentional or pragmatic tradeoffs for this project. Each +# ignore below has a one-line justification. +failure-threshold: warning + +ignored: + # Pin versions in apt get install. We intentionally don't pin common + # tools (curl, git, openssh-client, etc.) — security updates flow in + # via the periodic base-image rebuild, and pinning would lock us to + # superseded patch releases. Same rationale as nearly every distro- + # base official image (python, node, debian). + - DL3008 + # Use WORKDIR to switch to a directory. The image uses `(cd web && …)` + # / `(cd ../ui-tui && …)` inline subshells for one-off build steps + # because they don't affect later RUN commands; promoting them to + # full WORKDIR switches with restores would obscure intent. + - DL3003 + # Multiple consecutive RUN instructions. The `touch README.md` + `uv + # sync` split is intentional — `touch` is cheap, `uv sync` is the + # expensive layer-cached step we want isolated, and merging them + # would invalidate the cache for trivial changes. + - DL3059 + # Last USER should not be root. The entrypoint is responsible for + # gosu-dropping to the hermes user; running as root is required so + # usermod/groupmod can remap UIDs per HERMES_UID at runtime. Phase 2 + # of the s6-overlay migration preserves this contract — /init runs + # as root, individual services drop via s6-setuidgid. + - DL3002 + +# Require explicit base-image pins (SHA256) — we already do this. +trustedRegistries: + - docker.io + - ghcr.io