mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
fix(gateway): detect gateway process via /proc in Docker without procps
Salvage of NousResearch/hermes-agent#7622. Docker images often lack procps so `ps` is unavailable. Try reading /proc/*/cmdline first (works in any Linux container) and fall back to `ps -A eww` only when /proc is not present. PermissionError on individual PIDs is silently skipped. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
parent
2ffef15675
commit
6bf7ac3185
2 changed files with 196 additions and 32 deletions
|
|
@ -394,42 +394,68 @@ def _scan_gateway_pids(exclude_pids: set[int], all_profiles: bool = False) -> li
|
|||
pass
|
||||
current_cmd = ""
|
||||
else:
|
||||
result = subprocess.run(
|
||||
["ps", "-A", "eww", "-o", "pid=,command="],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
for line in result.stdout.split("\n"):
|
||||
stripped = line.strip()
|
||||
if not stripped or "grep" in stripped:
|
||||
continue
|
||||
# Try /proc first (works in Docker without procps installed),
|
||||
# fall back to ps -A eww.
|
||||
_found_via_proc = False
|
||||
if os.path.isdir("/proc"):
|
||||
try:
|
||||
my_pid = os.getpid()
|
||||
for entry in os.listdir("/proc"):
|
||||
if not entry.isdigit():
|
||||
continue
|
||||
pid = int(entry)
|
||||
if pid == my_pid or pid in exclude_pids:
|
||||
continue
|
||||
try:
|
||||
cmdline = open(f"/proc/{pid}/cmdline", "rb").read().decode("utf-8", errors="replace")
|
||||
cmdline = cmdline.replace("\x00", " ")
|
||||
if any(p in cmdline for p in patterns) and (
|
||||
all_profiles or _matches_current_profile(cmdline)
|
||||
):
|
||||
_append_unique_pid(pids, pid, exclude_pids)
|
||||
except (OSError, PermissionError):
|
||||
continue
|
||||
_found_via_proc = True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
pid = None
|
||||
command = ""
|
||||
if not _found_via_proc:
|
||||
result = subprocess.run(
|
||||
["ps", "-A", "eww", "-o", "pid=,command="],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=10,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
return []
|
||||
for line in result.stdout.split("\n"):
|
||||
stripped = line.strip()
|
||||
if not stripped or "grep" in stripped:
|
||||
continue
|
||||
|
||||
parts = stripped.split(None, 1)
|
||||
if len(parts) == 2:
|
||||
try:
|
||||
pid = int(parts[0])
|
||||
command = parts[1]
|
||||
except ValueError:
|
||||
pid = None
|
||||
pid = None
|
||||
command = ""
|
||||
|
||||
if pid is None:
|
||||
aux_parts = stripped.split()
|
||||
if len(aux_parts) > 10 and aux_parts[1].isdigit():
|
||||
pid = int(aux_parts[1])
|
||||
command = " ".join(aux_parts[10:])
|
||||
parts = stripped.split(None, 1)
|
||||
if len(parts) == 2:
|
||||
try:
|
||||
pid = int(parts[0])
|
||||
command = parts[1]
|
||||
except ValueError:
|
||||
pid = None
|
||||
|
||||
if pid is None:
|
||||
continue
|
||||
if any(pattern in command for pattern in patterns) and (
|
||||
all_profiles or _matches_current_profile(command)
|
||||
):
|
||||
_append_unique_pid(pids, pid, exclude_pids)
|
||||
if pid is None:
|
||||
aux_parts = stripped.split()
|
||||
if len(aux_parts) > 10 and aux_parts[1].isdigit():
|
||||
pid = int(aux_parts[1])
|
||||
command = " ".join(aux_parts[10:])
|
||||
|
||||
if pid is None:
|
||||
continue
|
||||
if any(pattern in command for pattern in patterns) and (
|
||||
all_profiles or _matches_current_profile(command)
|
||||
):
|
||||
_append_unique_pid(pids, pid, exclude_pids)
|
||||
except (OSError, subprocess.TimeoutExpired):
|
||||
return []
|
||||
|
||||
|
|
|
|||
138
tests/hermes_cli/test_gateway_proc_fallback.py
Normal file
138
tests/hermes_cli/test_gateway_proc_fallback.py
Normal file
|
|
@ -0,0 +1,138 @@
|
|||
"""Tests for /proc-based gateway PID detection in Docker environments.
|
||||
|
||||
Verifies that _scan_gateway_pids() uses /proc/*/cmdline when available
|
||||
(Docker without procps) and falls back to ps only when /proc is absent.
|
||||
|
||||
See: NousResearch/hermes-agent#7622
|
||||
"""
|
||||
|
||||
import os
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import hermes_cli.gateway as gateway_mod
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
_GATEWAY_CMD = "python -m hermes_cli.main gateway run"
|
||||
_OTHER_CMD = "python -m some_other_thing"
|
||||
|
||||
|
||||
def _fake_proc_dir(entries: dict):
|
||||
"""Return side_effects that simulate /proc: isdir → True, listdir → pids,
|
||||
open(cmdline) → null-delimited command bytes."""
|
||||
def _isdir(path):
|
||||
return str(path) == "/proc"
|
||||
|
||||
def _listdir(path):
|
||||
if str(path) == "/proc":
|
||||
return [str(pid) for pid in entries] + ["self", "version"]
|
||||
raise FileNotFoundError(path)
|
||||
|
||||
def _open(path, mode="r", **kwargs):
|
||||
path_str = str(path)
|
||||
if "/cmdline" in path_str:
|
||||
pid = int(path_str.split("/proc/")[1].split("/")[0])
|
||||
raw = entries.get(pid, "").encode("utf-8").replace(b" ", b"\x00")
|
||||
m = MagicMock()
|
||||
m.read.return_value = raw
|
||||
m.__enter__ = lambda s: s
|
||||
m.__exit__ = MagicMock(return_value=False)
|
||||
return m
|
||||
raise FileNotFoundError(path)
|
||||
|
||||
return _isdir, _listdir, _open
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestProcFallback:
|
||||
"""_scan_gateway_pids reads /proc when available, skips ps."""
|
||||
|
||||
def test_detects_gateway_pid_via_proc(self):
|
||||
my_pid = os.getpid()
|
||||
entries = {
|
||||
my_pid: "python -m hermes_cli.main", # own process — excluded
|
||||
12345: _GATEWAY_CMD,
|
||||
99999: _OTHER_CMD,
|
||||
}
|
||||
_isdir, _listdir, _open = _fake_proc_dir(entries)
|
||||
|
||||
with (
|
||||
patch("hermes_cli.gateway.is_windows", return_value=False),
|
||||
patch("os.path.isdir", side_effect=_isdir),
|
||||
patch("os.listdir", side_effect=_listdir),
|
||||
patch("builtins.open", side_effect=_open),
|
||||
patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
|
||||
patch("subprocess.run") as mock_ps,
|
||||
):
|
||||
pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
|
||||
|
||||
assert 12345 in pids
|
||||
assert 99999 not in pids
|
||||
mock_ps.assert_not_called() # ps must NOT be called when /proc worked
|
||||
|
||||
def test_excludes_own_pid_from_proc_scan(self):
|
||||
my_pid = os.getpid()
|
||||
entries = {my_pid: _GATEWAY_CMD}
|
||||
_isdir, _listdir, _open = _fake_proc_dir(entries)
|
||||
|
||||
with (
|
||||
patch("hermes_cli.gateway.is_windows", return_value=False),
|
||||
patch("os.path.isdir", side_effect=_isdir),
|
||||
patch("os.listdir", side_effect=_listdir),
|
||||
patch("builtins.open", side_effect=_open),
|
||||
patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
|
||||
patch("subprocess.run"),
|
||||
):
|
||||
pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
|
||||
|
||||
assert my_pid not in pids
|
||||
|
||||
def test_falls_back_to_ps_when_proc_absent(self):
|
||||
ps_output = f"12345 {_GATEWAY_CMD}\n99999 {_OTHER_CMD}\n"
|
||||
mock_result = MagicMock()
|
||||
mock_result.returncode = 0
|
||||
mock_result.stdout = ps_output
|
||||
|
||||
with (
|
||||
patch("hermes_cli.gateway.is_windows", return_value=False),
|
||||
patch("os.path.isdir", return_value=False),
|
||||
patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
|
||||
patch("subprocess.run", return_value=mock_result) as mock_ps,
|
||||
):
|
||||
pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
|
||||
|
||||
mock_ps.assert_called_once()
|
||||
assert 12345 in pids
|
||||
|
||||
def test_proc_permission_error_skips_pid(self):
|
||||
def _isdir(path):
|
||||
return str(path) == "/proc"
|
||||
|
||||
def _listdir(path):
|
||||
if str(path) == "/proc":
|
||||
return ["12345", "self"]
|
||||
raise FileNotFoundError
|
||||
|
||||
def _open(path, mode="r", **kwargs):
|
||||
raise PermissionError("no access")
|
||||
|
||||
with (
|
||||
patch("hermes_cli.gateway.is_windows", return_value=False),
|
||||
patch("os.path.isdir", side_effect=_isdir),
|
||||
patch("os.listdir", side_effect=_listdir),
|
||||
patch("builtins.open", side_effect=_open),
|
||||
patch("hermes_cli.gateway._get_ancestor_pids", return_value=set()),
|
||||
patch("subprocess.run") as mock_ps,
|
||||
):
|
||||
pids = gateway_mod._scan_gateway_pids(set(), all_profiles=True)
|
||||
|
||||
# PermissionError swallowed — empty result, no crash
|
||||
assert 12345 not in pids
|
||||
mock_ps.assert_not_called() # /proc dir existed, so ps not called
|
||||
Loading…
Add table
Add a link
Reference in a new issue