fix(s6): dot-prefix gateway staging dir so svscan ignores it mid-build (#54834)
Some checks are pending
CI / Detect affected areas (push) Waiting to run
CI / Python tests (push) Blocked by required conditions
CI / Python lints (push) Blocked by required conditions
CI / TypeScript (push) Blocked by required conditions
CI / Docs Site (push) Blocked by required conditions
CI / Deny unrelated histories (push) Blocked by required conditions
CI / Check contributors (push) Blocked by required conditions
CI / Check uv.lock (push) Blocked by required conditions
CI / Lint Docker scripts (push) Blocked by required conditions
CI / Build&Test Docker image (push) Blocked by required conditions
CI / Supply-chain scan (push) Blocked by required conditions
CI / OSV scan (push) Waiting to run
CI / All required checks pass (push) Blocked by required conditions
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run

The register path builds each profile-gateway slot in a sibling staging
dir under /run/service (the scandir s6-svscan watches), then atomically
renames it to the live gateway-<profile> name. The staging dir was named
gateway-<profile>.tmp — a NON-dotfile — so a concurrent `s6-svscanctl -a`
rescan (fired by the cont-init reconciler registering gateway-default, or
by a sibling register) would supervise the half-built slot the moment it
had a valid type/run: s6-supervise spawns AS ROOT and mkdirs supervise/
root-owned 0700, then the in-flight _seed_supervise_skeleton early-returns
on the now-existing supervise/ and the next `mkdir supervise/event` hits
PermissionError.

That is the arm64-only CI flake on
test_s6_unregister_removes_service_dir_in_live_container
(PermissionError: /run/service/gateway-phase3test.tmp/supervise/event) —
arm64-only because the native-arm runner's wider scheduling jitter lets
the rescan land inside the ~ms seed window; amd64 ran 30/30 clean.

Fix: dot-prefix the staging dir (.gateway-<profile>.tmp) in both register
paths (S6ServiceManager.register_profile_gateway and
container_boot._register_service). s6-svscan skips any scandir entry whose
name begins with '.', so the half-built slot can never be supervised
mid-build. The atomic rename to the dotless live name is unchanged.

Verified on a real s6 image (amd64): a non-dotted staging dir is picked up
by an svscanctl -a rescan (SUPERVISED owner=root) while a dot-prefixed one
is ignored (NOT-SUPERVISED). Added a docker-harness regression test that
asserts both, plus a unit test that the staging dir is dot-prefixed.
This commit is contained in:
Ben Barclay 2026-06-29 21:33:00 +10:00 committed by GitHub
parent dbad6d47d3
commit f53ba9bb54
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 175 additions and 9 deletions

View file

@ -379,7 +379,16 @@ def _register_service(scandir: Path, profile: str, *, start: bool) -> None:
validate_profile_name(profile)
service_dir = scandir / f"gateway-{profile}"
tmp_dir = service_dir.with_name(service_dir.name + ".tmp")
# Dot-prefix the staging dir so s6-svscan skips it while half-built
# (s6-svscan ignores scandir entries whose name starts with ".").
# A non-dotted ``.tmp`` staging name is supervised AS ROOT by any
# concurrent ``s6-svscanctl -a`` rescan the moment it has a valid
# ``type``/``run``, creating a root-owned ``supervise/`` that makes
# ``_seed_supervise_skeleton`` EACCES — see the matching comment in
# ``S6ServiceManager.register_profile_gateway``. The atomic
# ``tmp_dir.replace(service_dir)`` below renames to the dotless live
# name, so the published slot is unchanged.
tmp_dir = service_dir.with_name("." + service_dir.name + ".tmp")
# Wipe any leftover tmp from a previous interrupted run.
if tmp_dir.exists():

View file

@ -965,9 +965,23 @@ class S6ServiceManager:
)
# Build the service directory atomically: write to a sibling
# temp dir, then rename. Avoids s6-svscan observing a half-
# populated directory on a fast rescan.
tmp_dir = svc_dir.with_name(svc_dir.name + ".tmp")
# temp dir, then rename. The staging name is DOT-PREFIXED
# (``.gateway-<profile>.tmp``) so s6-svscan ignores it while it
# is half-built: s6-svscan skips any scandir entry whose name
# begins with ``.``. Without the dot prefix, a concurrent
# ``s6-svscanctl -a`` rescan (fired by the cont-init reconciler
# registering ``gateway-default``, or by a sibling register)
# would supervise the still-being-seeded ``.tmp`` slot: it has a
# valid ``type``/``run`` by that point, so s6-supervise spawns
# AS ROOT and mkdir's ``supervise/`` root-owned 0700 — then this
# process's ``_seed_supervise_skeleton`` early-returns on the now-
# existing ``supervise/`` and the next ``mkdir supervise/event``
# hits EACCES. That is the arm64-only CI flake on
# test_s6_unregister_removes_service_dir_in_live_container
# (the wider scheduling jitter on the native arm64 runner lets the
# rescan land inside the ~ms seed window). The atomic rename to
# the dotless live name below is unaffected.
tmp_dir = svc_dir.with_name("." + svc_dir.name + ".tmp")
if tmp_dir.exists():
shutil.rmtree(tmp_dir, ignore_errors=True)
tmp_dir.mkdir(parents=True)

View file

@ -109,3 +109,93 @@ def test_s6_unregister_removes_service_dir_in_live_container(
"print(S6ServiceManager().list_profile_gateways())"
))
assert "phase3test" not in r.stdout
# Shell probe: build a service-shaped staging dir under the live scandir
# with a given NAME, fire a real `s6-svscanctl -a` rescan, wait, and
# report whether s6-svscan supervised it (which would create a root-owned
# supervise/ dir). Used to prove the dot-prefixed staging name is INVISIBLE
# to a concurrent rescan while a non-dotted one is not.
#
# Echoes one of: SUPERVISED / NOT-SUPERVISED, plus the supervise/ owner.
_SVSCAN_PICKUP_PROBE = r"""
set -eu
NAME="$1"
SCANDIR=/run/service
DIR="$SCANDIR/$NAME"
rm -rf "$DIR"
mkdir -p "$DIR"
printf 'longrun\n' > "$DIR/type"
printf '#!/command/execlineb -P\n/command/s6-sleep 600\n' > "$DIR/run"
chmod 755 "$DIR/run"
# Trigger a full rescan, exactly as register/reconcile do.
/command/s6-svscanctl -a "$SCANDIR"
# Give s6-svscan time to act (its scan is async; 200ms is the manager's
# own settle delay, use 2s here to be comfortably past it on any arch).
/command/s6-sleep 2
if [ -d "$DIR/supervise" ]; then
owner=$(stat -c '%U' "$DIR/supervise" 2>/dev/null || echo '?')
echo "SUPERVISED owner=$owner"
else
echo "NOT-SUPERVISED"
fi
# Best-effort teardown so the probe leaves no live supervisor behind.
/command/s6-svc -d "$DIR" 2>/dev/null || true
/command/s6-svscanctl -an "$SCANDIR" 2>/dev/null || true
/command/s6-sleep 1
rm -rf "$DIR" 2>/dev/null || true
"""
def test_s6_dotfile_staging_dir_is_ignored_by_svscan_rescan(
built_image: str, container_name: str,
) -> None:
"""Regression for the arm64 register-seed race.
The register path builds the slot in a sibling staging dir and then
atomically renames it to the live ``gateway-<profile>`` name. That
staging dir lives INSIDE the scandir s6-svscan watches, so its NAME
decides whether a concurrent ``s6-svscanctl -a`` rescan (fired by the
cont-init reconciler registering ``gateway-default``, or by another
register) supervises the half-built slot.
- A NON-dotted name (the old ``gateway-<p>.tmp``) IS picked up: once it
has a valid ``type``/``run``, s6-svscan spawns ``s6-supervise`` AS
ROOT, creating a root-owned ``supervise/`` which makes the in-flight
``_seed_supervise_skeleton`` EACCES on ``mkdir supervise/event``. That
is the arm64-only flake (the native-arm runner's wider scheduling
jitter lets the rescan land inside the seed window).
- A DOT-prefixed name (the fix, ``.gateway-<p>.tmp``) is SKIPPED by
s6-svscan and never supervised, so no root-owned ``supervise/`` can
appear under the staging dir.
This proves the mechanism directly and is arch-independent (it does not
rely on hitting the narrow timing window it forces the rescan and
checks pickup), so it guards the fix on the amd64 job too.
"""
start_container(built_image, container_name, cmd="sleep 120")
# Control: a NON-dotted service-shaped dir IS supervised by the rescan
# (root-owned supervise/). This is the pre-fix staging-name behaviour and
# confirms the probe actually exercises s6-svscan pickup.
r = docker_exec(
container_name, "sh", "-c", _SVSCAN_PICKUP_PROBE, "probe",
"gateway-raceprobe.tmp", user="root", timeout=30,
)
assert "SUPERVISED" in r.stdout and "NOT-SUPERVISED" not in r.stdout, (
"control failed: a non-dotted staging dir should be picked up by "
f"s6-svscan. stdout={r.stdout!r} stderr={r.stderr!r}"
)
# The fix: a DOT-prefixed staging dir (the name register/reconcile now
# use) must be IGNORED by the same rescan — no supervisor, no root-owned
# supervise/, so the in-flight seed can never EACCES.
r = docker_exec(
container_name, "sh", "-c", _SVSCAN_PICKUP_PROBE, "probe",
".gateway-raceprobe.tmp", user="root", timeout=30,
)
assert "NOT-SUPERVISED" in r.stdout, (
"dot-prefixed staging dir was supervised by s6-svscan — the race "
f"that EACCESes the seed is still reachable. stdout={r.stdout!r} "
f"stderr={r.stderr!r}"
)

View file

@ -489,19 +489,26 @@ def test_register_service_overwrites_existing_slot(tmp_path: Path) -> None:
hermes_home=tmp_path, scandir=scandir, dry_run=False,
)
# Slot still exists, no .tmp remnants.
# Slot still exists, no .tmp remnants (staging dir is dot-prefixed,
# so match it explicitly — a leading-`*` glob won't catch dotfiles).
assert (scandir / "gateway-coder" / "run").read_text() == first_run
assert list(scandir.glob("*.tmp")) == []
assert list(scandir.glob(".*.tmp")) == []
# Down marker now present (state went from running → stopped).
assert (scandir / "gateway-coder" / "down").exists()
def test_register_service_cleans_up_stale_tmp_dir(tmp_path: Path) -> None:
"""If a previous interrupted run left a .tmp sibling directory,
a fresh reconcile must clean it up rather than failing on mkdir."""
"""If a previous interrupted run left a staging sibling directory,
a fresh reconcile must clean it up rather than failing on mkdir.
The staging dir is dot-prefixed (``.gateway-<profile>.tmp``) so a
concurrent s6-svscan rescan can't supervise it half-built; the
cleanup must target that same dot-prefixed name.
"""
scandir = tmp_path / "run-service"; scandir.mkdir()
# Simulate a leftover from an interrupted run.
stale_tmp = scandir / "gateway-coder.tmp"
# Simulate a leftover from an interrupted run (current staging name).
stale_tmp = scandir / ".gateway-coder.tmp"
stale_tmp.mkdir()
(stale_tmp / "stale-file").write_text("garbage")

View file

@ -586,6 +586,52 @@ def test_s6_register_creates_service_dir_and_triggers_scan(
), f"s6-svscanctl -a not invoked; saw: {fake_subprocess_run}"
def test_s6_register_staging_dir_is_dotfile_hidden_from_svscan(
s6_scandir, fake_subprocess_run, monkeypatch: pytest.MonkeyPatch,
) -> None:
"""The mid-build staging dir MUST be dot-prefixed so s6-svscan
ignores it while it is half-populated.
s6-svscan skips any scandir entry whose name begins with ``.``. If
the staging dir were a plain ``gateway-<p>.tmp`` (a non-dotfile),
a concurrent ``s6-svscanctl -a`` rescan would supervise it the
moment it has a valid ``type``/``run`` spawning s6-supervise AS
ROOT, which mkdir's a root-owned ``supervise/`` and makes the
in-flight ``_seed_supervise_skeleton`` EACCES on
``mkdir supervise/event``. That was the arm64-only CI flake on
``test_s6_unregister_removes_service_dir_in_live_container``.
We capture the directory passed to ``_seed_supervise_skeleton``
(called mid-build, BEFORE the atomic rename to the live name) and
assert its basename starts with ``.`` and still lives in the
scandir as a sibling of the live slot.
"""
import hermes_cli.service_manager as sm
seen: list[str] = []
real_seed = sm._seed_supervise_skeleton
def _capturing_seed(svc_dir, *a, **kw):
seen.append(str(svc_dir))
return real_seed(svc_dir, *a, **kw)
monkeypatch.setattr(sm, "_seed_supervise_skeleton", _capturing_seed)
S6ServiceManager(scandir=s6_scandir).register_profile_gateway("coder")
assert seen, "_seed_supervise_skeleton was never called during register"
staging = seen[0]
staging_name = staging.rsplit("/", 1)[-1]
assert staging_name.startswith("."), (
f"staging dir must be a dotfile so s6-svscan skips it mid-build; "
f"got {staging_name!r}"
)
# Sibling of the live slot, in the same scandir.
assert staging == str(s6_scandir / ".gateway-coder.tmp")
# And the published (renamed) live slot is the dotless canonical name.
assert (s6_scandir / "gateway-coder").is_dir()
def test_s6_register_start_now_false_writes_down_marker(
s6_scandir, fake_subprocess_run,
) -> None: