fix(terminal): block gateway lifecycle commands from inside the gateway process

systemctl --user restart hermes-gateway run via the terminal tool is a
child of the gateway itself. When systemd delivers SIGTERM the gateway
kills this subprocess before it can complete, so the service may never
restart — reproducing issue #37453.

The hermes gateway restart/stop guard (hermes_cli/gateway.py) and the
cron-path guard (hermes_cli/cron.py) already block equivalent commands
in their respective paths but the terminal tool had no such defense.

Add a hard-block before command execution in terminal_tool: when
_HERMES_GATEWAY=1 and the command matches _contains_gateway_lifecycle_command,
return an error immediately. force=True cannot bypass it — unlike the
normal dangerous-command approval flow, here even a user-approved restart
would fail because the SIGTERM propagates to child processes.

Also extend _GATEWAY_LIFECYCLE_PATTERNS to match systemctl with flags
(e.g. systemctl --user restart) — the previous regex required the
action word immediately after systemctl with no flags in between.

Adds 9 regression tests: 6 blocked variants (parametrized), force bypass
attempt, safe systemctl passthrough, and guard-inactive-outside-gateway.
This commit is contained in:
AhmetArif0 2026-06-02 18:34:26 +03:00 committed by kshitij
parent c02192ff6a
commit 245b95b094
3 changed files with 131 additions and 1 deletions

View file

@ -25,7 +25,7 @@ _GATEWAY_LIFECYCLE_PATTERNS = re.compile(
r"(?i)"
r"(hermes\s+gateway\s+(restart|stop|start))"
r"|(launchctl\s+(kickstart|unload|load|stop|restart)\s+.*hermes)"
r"|(systemctl\s+(restart|stop|start)\s+.*hermes)"
r"|(systemctl\s+(-\S+\s+)*(restart|stop|start)\s+.*hermes)"
r"|(p?kill\s+.*hermes.*gateway)"
)

View file

@ -6,6 +6,7 @@ Covers:
- _contains_gateway_lifecycle_command pattern matching
"""
import json
import os
from argparse import Namespace
@ -250,3 +251,109 @@ class TestGatewaySelfTargetingGuard:
args = Namespace(gateway_command="restart", all=False, system=False)
with pytest.raises(_Reached):
gw.gateway_command(args)
# ---------------------------------------------------------------------------
# Defense 3: terminal_tool hard-blocks gateway lifecycle commands inside gateway
# ---------------------------------------------------------------------------
class TestTerminalToolGatewayLifecycleGuard:
"""terminal_tool must refuse gateway lifecycle commands when _HERMES_GATEWAY=1.
Issue #37453: systemctl --user restart hermes-gateway runs as a child of the
gateway process. When systemd delivers SIGTERM the gateway kills its own
restart command mid-execution the service may never restart. The guard
must fire before execution, unconditionally (force=True cannot bypass it).
"""
def _make_fake_env(self):
class _FakeEnv:
env = {}
def execute(self, command, **kwargs): # pragma: no cover
raise AssertionError("execute must not be reached")
return _FakeEnv()
def _minimal_config(self):
return {"env_type": "local", "cwd": "/tmp", "timeout": 60, "lifetime_seconds": 3600}
def _patch_env(self, monkeypatch, fake_env, *, inside_gateway: bool):
import tools.terminal_tool as tt
eid = "default"
monkeypatch.setattr(tt, "_active_environments", {eid: fake_env})
monkeypatch.setattr(tt, "_last_activity", {eid: 0.0})
monkeypatch.setattr(tt, "_task_env_overrides", {})
monkeypatch.setattr(tt, "_get_env_config", self._minimal_config)
if inside_gateway:
monkeypatch.setenv("_HERMES_GATEWAY", "1")
else:
monkeypatch.delenv("_HERMES_GATEWAY", raising=False)
@pytest.mark.parametrize("cmd", [
"systemctl restart hermes-gateway",
"systemctl --user restart hermes-gateway",
"systemctl stop hermes-gateway.service",
"hermes gateway restart",
"launchctl kickstart gui/501/ai.hermes.gateway",
"pkill -f hermes.*gateway",
])
def test_blocks_lifecycle_commands_inside_gateway(self, monkeypatch, cmd):
import tools.terminal_tool as tt
self._patch_env(monkeypatch, self._make_fake_env(), inside_gateway=True)
result = json.loads(tt.terminal_tool(command=cmd))
assert result["exit_code"] == 1
assert "Blocked" in result["error"]
def test_force_true_cannot_bypass_block(self, monkeypatch):
import tools.terminal_tool as tt
self._patch_env(monkeypatch, self._make_fake_env(), inside_gateway=True)
result = json.loads(tt.terminal_tool(
command="systemctl restart hermes-gateway", force=True
))
assert result["exit_code"] == 1
assert "Blocked" in result["error"]
def test_safe_systemctl_commands_pass_through(self, monkeypatch):
"""Non-hermes systemctl commands must not be blocked by this guard."""
import tools.terminal_tool as tt
calls = []
class _FakeEnv:
env = {}
def execute(self, command, **kwargs):
calls.append(command)
return {"output": "Active: running", "returncode": 0}
self._patch_env(monkeypatch, _FakeEnv(), inside_gateway=True)
monkeypatch.setattr(tt, "_check_all_guards", lambda cmd, env: {"approved": True})
result = json.loads(tt.terminal_tool(command="systemctl status nginx"))
assert result["exit_code"] == 0
assert calls == ["systemctl status nginx"]
def test_guard_inactive_outside_gateway(self, monkeypatch):
"""Without _HERMES_GATEWAY=1 the lifecycle guard must not fire."""
import tools.terminal_tool as tt
calls = []
class _FakeEnv:
env = {}
def execute(self, command, **kwargs):
calls.append(command)
return {"output": "restarting...", "returncode": 0}
self._patch_env(monkeypatch, _FakeEnv(), inside_gateway=False)
monkeypatch.setattr(tt, "_check_all_guards", lambda cmd, env: {"approved": True})
result = json.loads(tt.terminal_tool(command="systemctl restart hermes-gateway"))
# Outside the gateway the lifecycle guard doesn't block — the normal
# approval flow handles it (here mocked as approved).
assert result["exit_code"] == 0
assert calls == ["systemctl restart hermes-gateway"]

View file

@ -2058,6 +2058,29 @@ def terminal_tool(
env = new_env
logger.info("%s environment ready for task %s", env_type, effective_task_id[:8])
# Hard-block: gateway lifecycle commands (systemctl/launchctl/hermes
# restart|stop targeting hermes-gateway) must never run inside the
# gateway process itself. The restart would SIGTERM the gateway, which
# kills this very subprocess before it can complete — the service may
# never restart. This mirrors the `hermes gateway restart` guard in
# hermes_cli/gateway.py and the cron-path guard in hermes_cli/cron.py,
# but applies unconditionally (force=True cannot help here).
if os.environ.get("_HERMES_GATEWAY") == "1":
from hermes_cli.cron import _contains_gateway_lifecycle_command
if _contains_gateway_lifecycle_command(command):
return json.dumps({
"output": "",
"exit_code": 1,
"error": (
"Blocked: cannot restart or stop the gateway from inside the "
"gateway process. The gateway would kill this command before "
"it could complete (SIGTERM propagates to child processes). "
"Run `hermes gateway restart` from a separate shell outside "
"the running gateway."
),
"status": "error",
}, ensure_ascii=False)
# Pre-exec security checks (tirith + dangerous command detection)
# Skip check if force=True (user has confirmed they want to run it)
approval_note = None