feat(gateway): multiplex phase 4 — lifecycle guard + per-profile observability

- _guard_named_profile_under_multiplexer: when the default gateway is running with gateway.multiplex_profiles=on, a named-profile 'hermes gateway run' hard -errors (pointing at the multiplexer) instead of double-binding that profile's platforms. Inert unless all hold: this invocation is a named profile, a default-profile gateway is alive, and its config has multiplexing on. --force overrides. Wired into run_gateway's guard chain. - write_runtime_status gains served_profiles: the secondary-adapter startup records [active] + multiplexed profiles into runtime_status.json so 'hermes status' can show per-profile coverage without a second probe. Absent for single-profile gateways. Tests: served_profiles round-trips and is absent by default; guard is inert for the default profile / under --force / when no default gateway is running.
2026-06-21 10:22:18 +00:00 · 2026-06-18 16:12:06 +10:00 · 2026-06-18 16:12:06 +10:00 · 1e70df5fdd
commit 1e70df5fdd
parent d5d02eabb0
4 changed files with 151 additions and 0 deletions
--- a/gateway/run.py
+++ b/gateway/run.py
@ -6735,6 +6735,15 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
                    "Failed to start adapters for profile '%s': %s",
                    profile_name, e, exc_info=True,
                )
+
+        # Record served profiles in runtime status for `hermes status`.
+        try:
+            from gateway.status import write_runtime_status
+            served = [active] + sorted(self._profile_adapters.keys())
+            write_runtime_status(served_profiles=served)
+        except Exception:
+            logger.debug("could not record served_profiles", exc_info=True)
+
        return connected

    async def _start_one_profile_adapters(
--- a/gateway/status.py
+++ b/gateway/status.py
@ -575,6 +575,7 @@ def write_runtime_status(
    platform_state: Any = _UNSET,
    error_code: Any = _UNSET,
    error_message: Any = _UNSET,
+    served_profiles: Any = _UNSET,
 ) -> None:
    """Persist gateway runtime health information for diagnostics/status."""
    path = _get_runtime_status_path()
@ -595,6 +596,11 @@ def write_runtime_status(
        payload["restart_requested"] = bool(restart_requested)
    if active_agents is not _UNSET:
        payload["active_agents"] = max(0, int(active_agents))
+    if served_profiles is not _UNSET:
+        # Profiles this gateway multiplexes (multi-profile mode). Absent/empty
+        # for a single-profile gateway. Lets `hermes status` show per-profile
+        # coverage without a second probe.
+        payload["served_profiles"] = list(served_profiles or [])

    if platform is not _UNSET:
        platform_payload = payload["platforms"].get(platform, {})
--- a/hermes_cli/gateway.py
+++ b/hermes_cli/gateway.py
@ -3851,6 +3851,86 @@ def _running_under_gateway_supervisor() -> bool:
    return False


+def _guard_named_profile_under_multiplexer(force: bool = False) -> None:
+    """Refuse a named-profile gateway when a multiplexer is already serving it.
+
+    When the default profile's gateway runs with gateway.multiplex_profiles=on,
+    it is the sole inbound process for EVERY profile on the host. Starting a
+    separate gateway for a named profile would double-bind that profile's
+    platforms (two pollers on one bot token, port fights). In that mode a
+    named-profile ``hermes gateway run`` is always a misconfiguration, so we
+    hard-error with a pointer to the multiplexer. ``--force`` overrides.
+
+    Inert unless ALL of: (a) this invocation is a named profile, (b) a default-
+    profile gateway is running, (c) that gateway's config has multiplexing on.
+    """
+    if force:
+        return
+    # (a) Are we a named profile? Default/custom-hash homes return "".
+    try:
+        suffix = _profile_suffix()
+    except Exception:
+        return
+    if not suffix:
+        return  # default profile (or unrecognized) — this guard doesn't apply
+
+    try:
+        from hermes_constants import get_default_hermes_root
+        default_root = get_default_hermes_root()
+        # (b) Is the default-profile gateway running?
+        from gateway.status import get_running_pid as _default_running_pid  # noqa
+    except Exception:
+        return
+
+    try:
+        import yaml as _yaml
+        from gateway.status import _read_pid_record  # type: ignore
+
+        # (b) default gateway PID file present + alive
+        default_pid_path = default_root / "gateway.pid"
+        rec = _read_pid_record(default_pid_path)
+        if not rec:
+            return
+        from gateway.status import _pid_exists, _pid_from_record
+        pid = _pid_from_record(rec)
+        if not pid or not _pid_exists(pid):
+            return
+
+        # (c) default config has multiplexing on
+        cfg_path = default_root / "config.yaml"
+        if not cfg_path.exists():
+            return
+        with open(cfg_path, encoding="utf-8") as f:
+            cfg = _yaml.safe_load(f) or {}
+        multiplex = bool(
+            cfg.get("multiplex_profiles")
+            or (cfg.get("gateway", {}) or {}).get("multiplex_profiles")
+        )
+        if not multiplex:
+            return
+    except Exception:
+        logger.debug("Multiplexer-conflict probe failed", exc_info=True)
+        return
+
+    print_error(
+        f"The default gateway is running as a profile multiplexer and already "
+        f"serves profile '{suffix}'."
+    )
+    print(
+        "  When gateway.multiplex_profiles is on, the default gateway is the\n"
+        "  single inbound process for every profile. Starting a separate\n"
+        "  gateway for this profile would double-bind its platforms (two\n"
+        "  pollers on one bot token, port conflicts).\n"
+    )
+    print("  Manage the multiplexer instead (from the default profile):")
+    print()
+    print("    hermes gateway restart")
+    print()
+    print("  Pass --force to start a separate profile gateway anyway (not")
+    print("  recommended while the multiplexer is running).")
+    sys.exit(1)
+
+
 def _guard_supervised_gateway_conflict(force: bool = False) -> None:
    """Refuse a foreground gateway when a service manager already supervises one.

@ -3963,6 +4043,7 @@ def run_gateway(verbose: int = 0, quiet: bool = False, replace: bool = False, fo
               systemd/launchd service is already supervising this profile.
    """
    _guard_official_docker_root_gateway()
+    _guard_named_profile_under_multiplexer(force=force)
    _guard_supervised_gateway_conflict(force=force)
    _guard_existing_gateway_process_conflict(replace=replace)
    sys.path.insert(0, str(PROJECT_ROOT))
--- a/tests/gateway/test_multiplex_lifecycle.py
+++ b/tests/gateway/test_multiplex_lifecycle.py
@ -0,0 +1,55 @@
+"""Phase 4: lifecycle guard + per-profile observability."""
+import pytest
+
+
+class TestServedProfilesStatus:
+    def test_write_and_read_served_profiles(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import importlib
+        import gateway.status as status
+        importlib.reload(status)
+        try:
+            status.write_runtime_status(
+                gateway_state="running", served_profiles=["default", "coder"]
+            )
+            rec = status.read_runtime_status()
+            assert rec.get("served_profiles") == ["default", "coder"]
+        finally:
+            importlib.reload(status)
+
+    def test_served_profiles_absent_by_default(self, tmp_path, monkeypatch):
+        monkeypatch.setenv("HERMES_HOME", str(tmp_path))
+        import importlib
+        import gateway.status as status
+        importlib.reload(status)
+        try:
+            status.write_runtime_status(gateway_state="running")
+            rec = status.read_runtime_status()
+            assert "served_profiles" not in rec
+        finally:
+            importlib.reload(status)
+
+
+class TestNamedProfileMultiplexerGuard:
+    """_guard_named_profile_under_multiplexer is inert unless all conditions hold."""
+
+    def test_inert_for_default_profile(self, monkeypatch):
+        from hermes_cli import gateway as gw
+        monkeypatch.setattr(gw, "_profile_suffix", lambda: "")
+        # Should return without raising (default profile => guard N/A).
+        gw._guard_named_profile_under_multiplexer(force=False)
+
+    def test_force_bypasses(self, monkeypatch):
+        from hermes_cli import gateway as gw
+        # Even if it looks like a named profile, force returns immediately.
+        monkeypatch.setattr(gw, "_profile_suffix", lambda: "coder")
+        gw._guard_named_profile_under_multiplexer(force=True)
+
+    def test_inert_when_no_default_gateway_running(self, monkeypatch, tmp_path):
+        from hermes_cli import gateway as gw
+        monkeypatch.setattr(gw, "_profile_suffix", lambda: "coder")
+        monkeypatch.setattr(
+            "hermes_constants.get_default_hermes_root", lambda: tmp_path
+        )
+        # No gateway.pid in tmp_path => no running default gateway => no raise.
+        gw._guard_named_profile_under_multiplexer(force=False)