mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 01:31:41 +00:00
Merge 0344959d46 into a5129c72ef
This commit is contained in:
commit
3472ba4e2d
3 changed files with 392 additions and 14 deletions
|
|
@ -6,6 +6,7 @@ Handles: hermes gateway [run|start|stop|restart|status|install|uninstall|setup]
|
|||
|
||||
import asyncio
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import signal
|
||||
import subprocess
|
||||
|
|
@ -59,7 +60,69 @@ class GatewayRuntimeSnapshot:
|
|||
def has_process_service_mismatch(self) -> bool:
|
||||
return self.service_installed and self.running and not self.service_running
|
||||
|
||||
def _get_service_pids() -> set:
|
||||
# ``launchctl list <label>`` on macOS returns a plist-dict line that looks
|
||||
# like ``\t"PID" = 855;``. The only integer that must be on that line is
|
||||
# the PID, so a narrow regex is safer than string-splitting on quotes and
|
||||
# semicolons (which breaks when keys get new spacing or quoting in future
|
||||
# macOS releases). Anchored to ``"PID"`` so sibling keys like
|
||||
# ``LastExitStatus`` can't match.
|
||||
_LAUNCHD_PLIST_PID_RE = re.compile(r'"PID"\s*=\s*(\d+)\s*;')
|
||||
|
||||
|
||||
def _parse_launchd_list_output(stdout: str, label: str) -> set[int]:
|
||||
"""Extract PIDs for ``label`` from ``launchctl list`` output.
|
||||
|
||||
macOS ``launchctl list`` has two distinct output formats and which one
|
||||
you get depends on whether a label is passed:
|
||||
|
||||
* **No label argument** → tab-separated table ``PID\\tStatus\\tLabel``,
|
||||
one row per service, header line first.
|
||||
* **With label argument** → plist-dict dump of the single service,
|
||||
e.g. ``"PID" = 855;\\n"Label" = "ai.hermes.gateway";`` (#15225).
|
||||
|
||||
The original implementation only handled the first format, so when
|
||||
called with a label the data line parsed as a dict key/value and
|
||||
``parts[2]`` (``'"ai.hermes.gateway";'``) never equalled the bare
|
||||
label string — no PID ever got extracted. This helper accepts both
|
||||
formats and returns the extracted PIDs as a set. Filtering on
|
||||
``label`` (to avoid grabbing an unrelated service's PID from a
|
||||
no-arg dump) is done only when the tab-separated path is taken,
|
||||
because the plist-dict dump is already scoped to the label the
|
||||
caller requested.
|
||||
"""
|
||||
pids: set[int] = set()
|
||||
if not stdout:
|
||||
return pids
|
||||
|
||||
# Plist-dict format: ``"PID" = NNN;`` lines. ``findall`` returns all
|
||||
# matches as strings — we trust ``int()`` since the regex capture is
|
||||
# ``\d+``.
|
||||
for match in _LAUNCHD_PLIST_PID_RE.findall(stdout):
|
||||
try:
|
||||
pid = int(match)
|
||||
except ValueError:
|
||||
continue
|
||||
if pid > 0:
|
||||
pids.add(pid)
|
||||
if pids:
|
||||
return pids
|
||||
|
||||
# Tab-separated fallback (no-label dump). Header line starts with
|
||||
# ``PID\tStatus\tLabel`` so only rows whose third column matches our
|
||||
# label contribute.
|
||||
for line in stdout.strip().splitlines():
|
||||
parts = line.split()
|
||||
if len(parts) >= 3 and parts[2] == label:
|
||||
try:
|
||||
pid = int(parts[0])
|
||||
except ValueError:
|
||||
continue
|
||||
if pid > 0:
|
||||
pids.add(pid)
|
||||
return pids
|
||||
|
||||
|
||||
def _get_service_pids() -> set[int]:
|
||||
"""Return PIDs currently managed by systemd or launchd gateway services.
|
||||
|
||||
Used to avoid killing freshly-restarted service processes when sweeping
|
||||
|
|
@ -67,7 +130,7 @@ def _get_service_pids() -> set:
|
|||
service manager having committed the new PID before the restart command
|
||||
returns (true for both systemd and launchd in practice).
|
||||
"""
|
||||
pids: set = set()
|
||||
pids: set[int] = set()
|
||||
|
||||
# --- systemd (Linux): user and system scopes ---
|
||||
if supports_systemd_services():
|
||||
|
|
@ -106,16 +169,7 @@ def _get_service_pids() -> set:
|
|||
capture_output=True, text=True, timeout=5,
|
||||
)
|
||||
if result.returncode == 0:
|
||||
# Output: "PID\tStatus\tLabel" header, then one data line
|
||||
for line in result.stdout.strip().splitlines():
|
||||
parts = line.split()
|
||||
if len(parts) >= 3 and parts[2] == label:
|
||||
try:
|
||||
pid = int(parts[0])
|
||||
if pid > 0:
|
||||
pids.add(pid)
|
||||
except ValueError:
|
||||
pass
|
||||
pids |= _parse_launchd_list_output(result.stdout, label)
|
||||
except (FileNotFoundError, subprocess.TimeoutExpired):
|
||||
pass
|
||||
|
||||
|
|
@ -299,8 +353,16 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
|
|||
pass
|
||||
current_cmd = ""
|
||||
else:
|
||||
# ``-A -ww`` (listing every process, unlimited line width) is
|
||||
# portable across Linux/procps, BSD/Darwin, and busybox. The
|
||||
# old invocation passed ``eww`` as a positional argument — on
|
||||
# Darwin that's rejected as "illegal argument" (#15225) and on
|
||||
# FreeBSD the embedded ``e`` prepended environment variables
|
||||
# to the command column, which broke ``split(None, 1)`` there
|
||||
# (#9069). Dropping the ``e`` also stops leaking env vars
|
||||
# (API keys, tokens) into the command string we parse.
|
||||
result = subprocess.run(
|
||||
["ps", "-A", "eww", "-o", "pid=,command="],
|
||||
["ps", "-A", "-ww", "-o", "pid=,command="],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
|
|
|
|||
|
|
@ -261,7 +261,9 @@ def test_find_gateway_pids_falls_back_to_pid_file_when_process_scan_fails(monkey
|
|||
monkeypatch.setattr("gateway.status.get_running_pid", lambda: 321)
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
if cmd[:4] == ["ps", "-A", "eww", "-o"]:
|
||||
# ``-A -ww`` is the portable form since #15225 — earlier revisions
|
||||
# passed ``eww`` which Darwin rejects as illegal.
|
||||
if cmd[:4] == ["ps", "-A", "-ww", "-o"]:
|
||||
return SimpleNamespace(returncode=1, stdout="", stderr="ps failed")
|
||||
raise AssertionError(f"Unexpected command: {cmd}")
|
||||
|
||||
|
|
|
|||
314
tests/hermes_cli/test_gateway_pid_detection_macos.py
Normal file
314
tests/hermes_cli/test_gateway_pid_detection_macos.py
Normal file
|
|
@ -0,0 +1,314 @@
|
|||
"""Regression guard for #15225 — macOS gateway-pid detection.
|
||||
|
||||
Two independent bugs in ``hermes_cli/gateway.py`` caused
|
||||
``find_gateway_pids()`` to return an empty list on macOS, which made
|
||||
``hermes cron list`` print a false-positive "Gateway is not running"
|
||||
warning even when launchd had the service loaded and cron firing was
|
||||
working normally.
|
||||
|
||||
Bug 1 — ``_get_service_pids()`` called ``launchctl list <label>`` and
|
||||
then parsed the output as the tab-separated table ``PID\\tStatus\\tLabel``.
|
||||
With a label argument, macOS ``launchctl`` instead returns a plist-dict
|
||||
``{ "PID" = 855; "Label" = "ai.hermes.gateway"; ...}`` — the old loop's
|
||||
``parts[2] == label`` check never matched the quoted/semicolon'd token
|
||||
so no PID was ever extracted.
|
||||
|
||||
Bug 2 — ``_scan_gateway_pids()`` invoked ``ps -A eww -o pid=,command=``.
|
||||
Darwin's ``ps`` rejects ``eww`` as an illegal positional argument
|
||||
(``exit=1``, empty stdout), so the fallback PID scan also produced
|
||||
nothing.
|
||||
|
||||
These tests pin the fixes at the production-code entry points:
|
||||
|
||||
* ``_parse_launchd_list_output`` (the new helper) is exercised directly
|
||||
with the exact plist-dict output from the bug report AND with the
|
||||
tab-separated format the old code expected — both must surface the
|
||||
correct PID.
|
||||
* ``_get_service_pids`` is exercised end-to-end with ``subprocess.run``
|
||||
patched to return the plist-dict payload, proving the macOS branch
|
||||
no longer comes back empty for a running service.
|
||||
* ``_scan_gateway_pids`` (and ``find_gateway_pids`` through it) is
|
||||
exercised with a patched ``subprocess.run`` that records the exact
|
||||
command list the implementation passed — failing loudly if the
|
||||
invocation ever regresses to ``eww``.
|
||||
"""
|
||||
from types import SimpleNamespace
|
||||
|
||||
import pytest
|
||||
|
||||
from hermes_cli import gateway
|
||||
|
||||
|
||||
# Real-world plist-dict payload taken from the #15225 reproduction,
|
||||
# minus shell-specific paths (so the fixture is portable across contributors).
|
||||
_MACOS_LAUNCHCTL_LIST_LABEL_OUTPUT = """\
|
||||
{
|
||||
\t"StandardOutPath" = "/Users/someuser/.hermes/logs/gateway.log";
|
||||
\t"LimitLoadToSessionType" = "Aqua";
|
||||
\t"StandardErrorPath" = "/Users/someuser/.hermes/logs/gateway.error.log";
|
||||
\t"Label" = "ai.hermes.gateway";
|
||||
\t"OnDemand" = true;
|
||||
\t"LastExitStatus" = 0;
|
||||
\t"PID" = 855;
|
||||
\t"Program" = "/Users/someuser/.hermes/hermes-agent/venv/bin/python";
|
||||
\t"ProgramArguments" = (
|
||||
\t\t"/Users/someuser/.hermes/hermes-agent/venv/bin/python";
|
||||
\t\t"-m";
|
||||
\t\t"hermes_cli";
|
||||
\t\t"gateway";
|
||||
\t\t"run";
|
||||
\t);
|
||||
};
|
||||
"""
|
||||
|
||||
|
||||
# Tab-separated format — what ``launchctl list`` (no label) produces. The
|
||||
# old code path assumed this format regardless of whether a label was
|
||||
# passed; the helper still has to handle it so callers that change their
|
||||
# mind about whether to pass a label don't break.
|
||||
_MACOS_LAUNCHCTL_LIST_TABLE_OUTPUT = """\
|
||||
PID\tStatus\tLabel
|
||||
855\t0\tai.hermes.gateway
|
||||
-\t0\tcom.apple.somethingelse
|
||||
1024\t0\tcom.other.service
|
||||
"""
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 1 — _parse_launchd_list_output (new helper)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestParseLaunchdListOutput:
|
||||
def test_extracts_pid_from_plist_dict_format(self):
|
||||
"""macOS returns a plist-dict when a label is passed to
|
||||
``launchctl list``. This is the exact format from the #15225
|
||||
reproduction. The old regex-less string.split()-based code saw
|
||||
``parts[2] == '"ai.hermes.gateway";'`` (quoted + semicolon'd)
|
||||
and never matched the bare label — so it returned the empty
|
||||
set. The new helper parses ``"PID" = N;`` directly."""
|
||||
pids = gateway._parse_launchd_list_output(
|
||||
_MACOS_LAUNCHCTL_LIST_LABEL_OUTPUT, "ai.hermes.gateway",
|
||||
)
|
||||
assert pids == {855}
|
||||
|
||||
def test_extracts_pid_from_tab_separated_format(self):
|
||||
"""When the caller runs ``launchctl list`` without a label, the
|
||||
output is a tab-separated table. The helper must still extract
|
||||
only PIDs for the requested label (the table may list many
|
||||
services)."""
|
||||
pids = gateway._parse_launchd_list_output(
|
||||
_MACOS_LAUNCHCTL_LIST_TABLE_OUTPUT, "ai.hermes.gateway",
|
||||
)
|
||||
assert pids == {855}
|
||||
|
||||
def test_skips_tab_separated_rows_with_dash_pid(self):
|
||||
"""In the tab-separated format, an unloaded service appears as
|
||||
``-\\t0\\tlabel``. ``int("-")`` would raise; the helper must
|
||||
skip such rows cleanly and return an empty set if the requested
|
||||
label is only present as a dash row."""
|
||||
pids = gateway._parse_launchd_list_output(
|
||||
"PID\tStatus\tLabel\n-\t0\tai.hermes.gateway\n",
|
||||
"ai.hermes.gateway",
|
||||
)
|
||||
assert pids == set()
|
||||
|
||||
def test_empty_output_returns_empty_set(self):
|
||||
"""A 0-length stdout (e.g. launchctl hanging, or a non-zero exit
|
||||
where the caller already filtered) must never crash the helper
|
||||
or synthesise fake PIDs."""
|
||||
assert gateway._parse_launchd_list_output("", "ai.hermes.gateway") == set()
|
||||
|
||||
def test_plist_dict_wins_over_tab_separated_when_both_look_plausible(self):
|
||||
"""Defensive: if some future macOS release returned *both*
|
||||
shapes concatenated, trust the plist-dict (the label-scoped
|
||||
answer) rather than the table (which might list sibling
|
||||
services for the same user). The helper picks the plist
|
||||
matches first and only falls back to the tab-separated path
|
||||
when none were found."""
|
||||
merged = (
|
||||
_MACOS_LAUNCHCTL_LIST_LABEL_OUTPUT
|
||||
+ "\nPID\tStatus\tLabel\n9999\t0\tai.hermes.gateway\n"
|
||||
)
|
||||
pids = gateway._parse_launchd_list_output(merged, "ai.hermes.gateway")
|
||||
assert pids == {855} # from the plist-dict block, not the table
|
||||
|
||||
def test_pid_zero_is_rejected(self):
|
||||
"""``"PID" = 0;`` would be either kernel or "service not running"
|
||||
depending on the OS — either way, never a real gateway PID.
|
||||
Accepting it would crash downstream ``os.kill`` calls that
|
||||
interpret 0 as 'this process group'."""
|
||||
pids = gateway._parse_launchd_list_output(
|
||||
'{\n\t"PID" = 0;\n\t"Label" = "ai.hermes.gateway";\n};\n',
|
||||
"ai.hermes.gateway",
|
||||
)
|
||||
assert pids == set()
|
||||
|
||||
def test_other_label_entry_in_table_ignored(self):
|
||||
"""The tab-separated path must filter on the third column
|
||||
(label) — a different service at the top of the table must not
|
||||
contribute its PID to our result."""
|
||||
text = (
|
||||
"PID\tStatus\tLabel\n"
|
||||
"1234\t0\tcom.apple.unrelated\n"
|
||||
"855\t0\tai.hermes.gateway\n"
|
||||
)
|
||||
pids = gateway._parse_launchd_list_output(text, "ai.hermes.gateway")
|
||||
assert pids == {855}
|
||||
|
||||
@pytest.mark.parametrize("spacing", [
|
||||
'"PID" = 855;', # canonical
|
||||
'"PID"=855;', # no spaces
|
||||
'"PID" = 855 ;', # extra spaces
|
||||
'\t\t"PID" = 855;', # leading tabs (real output)
|
||||
])
|
||||
def test_pid_regex_tolerates_whitespace_variants(self, spacing):
|
||||
"""launchd's plist dumper is not a stable format — prior Apple
|
||||
releases have shifted spacing. The regex must not hinge on an
|
||||
exact ``"PID" = N;`` with a single space or we'll regress
|
||||
silently on a future macOS update."""
|
||||
text = f'{{\n\t{spacing}\n\t"Label" = "ai.hermes.gateway";\n}};\n'
|
||||
pids = gateway._parse_launchd_list_output(text, "ai.hermes.gateway")
|
||||
assert pids == {855}
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 1 — _get_service_pids wiring (macOS branch)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestGetServicePidsMacOS:
|
||||
"""End-to-end check that the macOS branch of ``_get_service_pids``
|
||||
actually picks up the PID for a running gateway now — the symptom
|
||||
#15225 complained about."""
|
||||
|
||||
def _patch_macos(self, monkeypatch, launchctl_stdout, launchctl_rc=0):
|
||||
monkeypatch.setattr(gateway, "is_macos", lambda: True)
|
||||
# Avoid running systemd probes on a macOS-emulation test —
|
||||
# ``supports_systemd_services`` already returns False on Darwin
|
||||
# but tests don't run on Darwin by default.
|
||||
monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
|
||||
monkeypatch.setattr(gateway, "get_launchd_label", lambda: "ai.hermes.gateway")
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
if cmd[:3] == ["launchctl", "list", "ai.hermes.gateway"]:
|
||||
return SimpleNamespace(
|
||||
returncode=launchctl_rc,
|
||||
stdout=launchctl_stdout,
|
||||
stderr="",
|
||||
)
|
||||
raise AssertionError(f"Unexpected subprocess call: {cmd!r}")
|
||||
|
||||
monkeypatch.setattr(gateway.subprocess, "run", fake_run)
|
||||
|
||||
def test_returns_pid_for_running_service(self, monkeypatch):
|
||||
self._patch_macos(monkeypatch, _MACOS_LAUNCHCTL_LIST_LABEL_OUTPUT)
|
||||
assert gateway._get_service_pids() == {855}
|
||||
|
||||
def test_returns_empty_set_when_launchctl_exits_nonzero(self, monkeypatch):
|
||||
"""The caller should not record PIDs from a failed command —
|
||||
e.g. launchctl returning "Could not find service" as rc=3."""
|
||||
self._patch_macos(monkeypatch, "", launchctl_rc=3)
|
||||
assert gateway._get_service_pids() == set()
|
||||
|
||||
def test_survives_launchctl_missing_on_path(self, monkeypatch):
|
||||
"""If ``launchctl`` isn't installed (hermes CLI running on a
|
||||
nonstandard macOS image), we must not propagate the
|
||||
``FileNotFoundError`` — return an empty set and let the ps
|
||||
fallback try next."""
|
||||
monkeypatch.setattr(gateway, "is_macos", lambda: True)
|
||||
monkeypatch.setattr(gateway, "supports_systemd_services", lambda: False)
|
||||
monkeypatch.setattr(gateway, "get_launchd_label", lambda: "ai.hermes.gateway")
|
||||
|
||||
def boom(cmd, **kwargs):
|
||||
raise FileNotFoundError("launchctl")
|
||||
|
||||
monkeypatch.setattr(gateway.subprocess, "run", boom)
|
||||
assert gateway._get_service_pids() == set()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Bug 2 — _scan_gateway_pids ps invocation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestPsInvocationPortability:
|
||||
"""The ``ps`` fallback invocation must be portable across Linux,
|
||||
Darwin, and FreeBSD. ``eww`` was illegal on Darwin (#15225) and
|
||||
leaked env vars (including API keys) on FreeBSD (#9069) — the
|
||||
portable replacement is ``-A -ww`` (list all processes, wide
|
||||
wide) with no ``e``.
|
||||
"""
|
||||
|
||||
def _invoke(self, monkeypatch, *, returncode=0, stdout=""):
|
||||
recorded = {"cmd": None}
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
recorded["cmd"] = list(cmd)
|
||||
return SimpleNamespace(returncode=returncode, stdout=stdout, stderr="")
|
||||
|
||||
monkeypatch.setattr(gateway, "is_windows", lambda: False)
|
||||
monkeypatch.setattr(gateway.subprocess, "run", fake_run)
|
||||
|
||||
# Exercise the production path. ``_scan_gateway_pids`` takes an
|
||||
# exclude-set; we pass empty so nothing is filtered out.
|
||||
pids = gateway._scan_gateway_pids(set(), all_profiles=True)
|
||||
return recorded["cmd"], pids
|
||||
|
||||
def test_never_passes_eww_positional(self, monkeypatch):
|
||||
"""The central regression guard: neither ``"eww"`` nor any
|
||||
``e``-containing flag stack that would attach env vars should
|
||||
end up on the command line. Parametrise in case a future
|
||||
refactor adds an equivalent flag."""
|
||||
cmd, _ = self._invoke(monkeypatch)
|
||||
assert cmd is not None, "production code never invoked ps"
|
||||
assert "eww" not in cmd, (
|
||||
f"ps was invoked with 'eww' (illegal on Darwin, leaks env on "
|
||||
f"FreeBSD). Full cmd: {cmd!r}"
|
||||
)
|
||||
# ``-A -ww`` is the expected replacement — each flag should appear.
|
||||
assert "-A" in cmd
|
||||
assert "-ww" in cmd
|
||||
|
||||
def test_invocation_shape_matches_expected(self, monkeypatch):
|
||||
"""Pin the exact invocation shape so a refactor doesn't
|
||||
accidentally alter argv ordering. ``pid=,command=`` is important
|
||||
— the trailing ``=`` on each field suppresses the header line
|
||||
that would otherwise confuse the parser."""
|
||||
cmd, _ = self._invoke(monkeypatch)
|
||||
assert cmd == ["ps", "-A", "-ww", "-o", "pid=,command="], (
|
||||
f"ps invocation drifted: {cmd!r}"
|
||||
)
|
||||
|
||||
def test_parses_macos_style_output(self, monkeypatch):
|
||||
"""Verify the parse loop still works for realistic Darwin ps
|
||||
output — ``pid=,command=`` suppresses the header and lines are
|
||||
space-separated PID + command text. Command string must contain
|
||||
one of the gateway-scan patterns (e.g. ``hermes_cli.main gateway``)
|
||||
to be picked up — same as the production matcher."""
|
||||
sample = (
|
||||
" 855 /Users/someuser/.hermes/hermes-agent/venv/bin/python -m hermes_cli.main gateway run\n"
|
||||
" 1024 /usr/sbin/cupsd -l\n"
|
||||
)
|
||||
monkeypatch.setattr(gateway, "is_windows", lambda: False)
|
||||
|
||||
def fake_run(cmd, **kwargs):
|
||||
return SimpleNamespace(returncode=0, stdout=sample, stderr="")
|
||||
|
||||
monkeypatch.setattr(gateway.subprocess, "run", fake_run)
|
||||
pids = gateway._scan_gateway_pids(set(), all_profiles=True)
|
||||
assert 855 in pids, (
|
||||
f"expected PID 855 extracted from ps output, got {pids}"
|
||||
)
|
||||
assert 1024 not in pids, (
|
||||
"non-gateway process (cupsd) must not be matched"
|
||||
)
|
||||
|
||||
def test_returncode_nonzero_returns_empty_list(self, monkeypatch):
|
||||
"""Darwin used to return exit=1 for the old ``eww`` invocation.
|
||||
Even with the fix, a future platform that errors on ``-ww``
|
||||
must not crash — the caller returns []."""
|
||||
cmd, pids = self._invoke(monkeypatch, returncode=1, stdout="")
|
||||
assert pids == []
|
||||
# Still invoked with the expected shape, just failed.
|
||||
assert cmd == ["ps", "-A", "-ww", "-o", "pid=,command="]
|
||||
Loading…
Add table
Add a link
Reference in a new issue