From 1e70df5fdd8fb472ede6233ceb3890337f4e346c Mon Sep 17 00:00:00 2001 From: Ben Barclay Date: Thu, 18 Jun 2026 16:12:06 +1000 Subject: [PATCH] =?UTF-8?q?feat(gateway):=20multiplex=20phase=204=20?= =?UTF-8?q?=E2=80=94=20lifecycle=20guard=20+=20per-profile=20observability?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - _guard_named_profile_under_multiplexer: when the default gateway is running with gateway.multiplex_profiles=on, a named-profile 'hermes gateway run' hard -errors (pointing at the multiplexer) instead of double-binding that profile's platforms. Inert unless all hold: this invocation is a named profile, a default-profile gateway is alive, and its config has multiplexing on. --force overrides. Wired into run_gateway's guard chain. - write_runtime_status gains served_profiles: the secondary-adapter startup records [active] + multiplexed profiles into runtime_status.json so 'hermes status' can show per-profile coverage without a second probe. Absent for single-profile gateways. Tests: served_profiles round-trips and is absent by default; guard is inert for the default profile / under --force / when no default gateway is running. --- gateway/run.py | 9 +++ gateway/status.py | 6 ++ hermes_cli/gateway.py | 81 +++++++++++++++++++++++ tests/gateway/test_multiplex_lifecycle.py | 55 +++++++++++++++ 4 files changed, 151 insertions(+) create mode 100644 tests/gateway/test_multiplex_lifecycle.py diff --git a/gateway/run.py b/gateway/run.py index 2d42dfd2d9f..51857ea68a0 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -6735,6 +6735,15 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew "Failed to start adapters for profile '%s': %s", profile_name, e, exc_info=True, ) + + # Record served profiles in runtime status for `hermes status`. + try: + from gateway.status import write_runtime_status + served = [active] + sorted(self._profile_adapters.keys()) + write_runtime_status(served_profiles=served) + except Exception: + logger.debug("could not record served_profiles", exc_info=True) + return connected async def _start_one_profile_adapters( diff --git a/gateway/status.py b/gateway/status.py index 2b4bd08ba39..b4bee42fdad 100644 --- a/gateway/status.py +++ b/gateway/status.py @@ -575,6 +575,7 @@ def write_runtime_status( platform_state: Any = _UNSET, error_code: Any = _UNSET, error_message: Any = _UNSET, + served_profiles: Any = _UNSET, ) -> None: """Persist gateway runtime health information for diagnostics/status.""" path = _get_runtime_status_path() @@ -595,6 +596,11 @@ def write_runtime_status( payload["restart_requested"] = bool(restart_requested) if active_agents is not _UNSET: payload["active_agents"] = max(0, int(active_agents)) + if served_profiles is not _UNSET: + # Profiles this gateway multiplexes (multi-profile mode). Absent/empty + # for a single-profile gateway. Lets `hermes status` show per-profile + # coverage without a second probe. + payload["served_profiles"] = list(served_profiles or []) if platform is not _UNSET: platform_payload = payload["platforms"].get(platform, {}) diff --git a/hermes_cli/gateway.py b/hermes_cli/gateway.py index 06f9c49b916..f1dddd087f4 100644 --- a/hermes_cli/gateway.py +++ b/hermes_cli/gateway.py @@ -3851,6 +3851,86 @@ def _running_under_gateway_supervisor() -> bool: return False +def _guard_named_profile_under_multiplexer(force: bool = False) -> None: + """Refuse a named-profile gateway when a multiplexer is already serving it. + + When the default profile's gateway runs with gateway.multiplex_profiles=on, + it is the sole inbound process for EVERY profile on the host. Starting a + separate gateway for a named profile would double-bind that profile's + platforms (two pollers on one bot token, port fights). In that mode a + named-profile ``hermes gateway run`` is always a misconfiguration, so we + hard-error with a pointer to the multiplexer. ``--force`` overrides. + + Inert unless ALL of: (a) this invocation is a named profile, (b) a default- + profile gateway is running, (c) that gateway's config has multiplexing on. + """ + if force: + return + # (a) Are we a named profile? Default/custom-hash homes return "". + try: + suffix = _profile_suffix() + except Exception: + return + if not suffix: + return # default profile (or unrecognized) — this guard doesn't apply + + try: + from hermes_constants import get_default_hermes_root + default_root = get_default_hermes_root() + # (b) Is the default-profile gateway running? + from gateway.status import get_running_pid as _default_running_pid # noqa + except Exception: + return + + try: + import yaml as _yaml + from gateway.status import _read_pid_record # type: ignore + + # (b) default gateway PID file present + alive + default_pid_path = default_root / "gateway.pid" + rec = _read_pid_record(default_pid_path) + if not rec: + return + from gateway.status import _pid_exists, _pid_from_record + pid = _pid_from_record(rec) + if not pid or not _pid_exists(pid): + return + + # (c) default config has multiplexing on + cfg_path = default_root / "config.yaml" + if not cfg_path.exists(): + return + with open(cfg_path, encoding="utf-8") as f: + cfg = _yaml.safe_load(f) or {} + multiplex = bool( + cfg.get("multiplex_profiles") + or (cfg.get("gateway", {}) or {}).get("multiplex_profiles") + ) + if not multiplex: + return + except Exception: + logger.debug("Multiplexer-conflict probe failed", exc_info=True) + return + + print_error( + f"The default gateway is running as a profile multiplexer and already " + f"serves profile '{suffix}'." + ) + print( + " When gateway.multiplex_profiles is on, the default gateway is the\n" + " single inbound process for every profile. Starting a separate\n" + " gateway for this profile would double-bind its platforms (two\n" + " pollers on one bot token, port conflicts).\n" + ) + print(" Manage the multiplexer instead (from the default profile):") + print() + print(" hermes gateway restart") + print() + print(" Pass --force to start a separate profile gateway anyway (not") + print(" recommended while the multiplexer is running).") + sys.exit(1) + + def _guard_supervised_gateway_conflict(force: bool = False) -> None: """Refuse a foreground gateway when a service manager already supervises one. @@ -3963,6 +4043,7 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False, fo systemd/launchd service is already supervising this profile. """ _guard_official_docker_root_gateway() + _guard_named_profile_under_multiplexer(force=force) _guard_supervised_gateway_conflict(force=force) _guard_existing_gateway_process_conflict(replace=replace) sys.path.insert(0, str(PROJECT_ROOT)) diff --git a/tests/gateway/test_multiplex_lifecycle.py b/tests/gateway/test_multiplex_lifecycle.py new file mode 100644 index 00000000000..6b5da5d9c38 --- /dev/null +++ b/tests/gateway/test_multiplex_lifecycle.py @@ -0,0 +1,55 @@ +"""Phase 4: lifecycle guard + per-profile observability.""" +import pytest + + +class TestServedProfilesStatus: + def test_write_and_read_served_profiles(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import gateway.status as status + importlib.reload(status) + try: + status.write_runtime_status( + gateway_state="running", served_profiles=["default", "coder"] + ) + rec = status.read_runtime_status() + assert rec.get("served_profiles") == ["default", "coder"] + finally: + importlib.reload(status) + + def test_served_profiles_absent_by_default(self, tmp_path, monkeypatch): + monkeypatch.setenv("HERMES_HOME", str(tmp_path)) + import importlib + import gateway.status as status + importlib.reload(status) + try: + status.write_runtime_status(gateway_state="running") + rec = status.read_runtime_status() + assert "served_profiles" not in rec + finally: + importlib.reload(status) + + +class TestNamedProfileMultiplexerGuard: + """_guard_named_profile_under_multiplexer is inert unless all conditions hold.""" + + def test_inert_for_default_profile(self, monkeypatch): + from hermes_cli import gateway as gw + monkeypatch.setattr(gw, "_profile_suffix", lambda: "") + # Should return without raising (default profile => guard N/A). + gw._guard_named_profile_under_multiplexer(force=False) + + def test_force_bypasses(self, monkeypatch): + from hermes_cli import gateway as gw + # Even if it looks like a named profile, force returns immediately. + monkeypatch.setattr(gw, "_profile_suffix", lambda: "coder") + gw._guard_named_profile_under_multiplexer(force=True) + + def test_inert_when_no_default_gateway_running(self, monkeypatch, tmp_path): + from hermes_cli import gateway as gw + monkeypatch.setattr(gw, "_profile_suffix", lambda: "coder") + monkeypatch.setattr( + "hermes_constants.get_default_hermes_root", lambda: tmp_path + ) + # No gateway.pid in tmp_path => no running default gateway => no raise. + gw._guard_named_profile_under_multiplexer(force=False)