mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-10 03:22:05 +00:00
PR #21561 migrated liveness probes across 14 call sites from
`os.kill(pid, 0)` to `gateway.status._pid_exists` (psutil-first) so
the gateway doesn't Ctrl+C-itself on Windows via bpo-14484. A handful of
tests still patched the old `os.kill` seam and either happened to pass
on POSIX (when PID 12345 incidentally wasn't alive on the CI worker) or
failed outright — on CI runs they surfaced as 7 flaky/stable failures.
Migrate each affected test to patch the correct seam:
- tests/tools/test_browser_orphan_reaper.py (5 tests)
Patch `gateway.status._pid_exists` instead of `os.kill`.
Rename test_permission_error_on_kill_check_skips to
test_alive_legacy_daemon_is_reaped — the old assertion was
"PermissionError on sig 0 → skip dir"; post-migration the
untracked-alive-daemon path always reaps the dir after SIGTERM
(best-effort semantics were preserved).
- tests/tools/test_windows_native_support.py (4 tests)
Replace tests that asserted `os.kill` seam behavior with tests
that exercise `ProcessRegistry._is_host_pid_alive` as a
delegator and split out a new TestPidExistsOSErrorWidening class
that hits `gateway.status._pid_exists` directly via the POSIX
fallback branch (so Windows-style `OSError(WinError 87)` + `PermissionError`
widening is still covered on Linux CI).
- tests/tools/test_process_registry.py (1 test)
Mock `psutil.Process` + `_pid_exists` instead of `os.kill`
for the detached-session kill path.
- tests/tools/test_mcp_stability.py::test_kill_orphaned_uses_sigkill_when_available
SIGTERM → alive-check → SIGKILL flow now uses `_pid_exists`
for the middle step; assertion count drops from 3 to 2.
- tests/gateway/test_status.py::TestScopedLocks (2 tests)
`acquire_scoped_lock` consults `_pid_exists`; patch that
seam directly instead of trying to control the nested psutil
call via os.kill monkeypatch.
- tests/hermes_cli/test_gateway.py::test_stop_profile_gateway_keeps_pid_file_when_process_still_running
The stop loop sends one SIGTERM via os.kill then polls 20x via
_pid_exists; instrument both separately. Old assertion
`calls["kill"] == 21` split into `kill == 1` + `alive_probes == 20`.
- tests/hermes_cli/test_auth_toctou_file_modes.py::test_shared_nous_store_writes_0o600_with_0o700_parent
Commit c34884ea2 switched the pytest seat-belt guard in
`_nous_shared_store_path()` from `Path.home() / ".hermes"`
to `get_default_hermes_root()`, which honors HERMES_HOME. The
test sets both HERMES_HOME and HERMES_SHARED_AUTH_DIR to
subpaths of the same tmp_path, and the override now collapses
onto the same path the guard is refusing. Renamed the override
subdirectory so the two paths diverge — guard passes, test runs.
All 21 original CI failures and their local-flaky siblings now pass
(278 tests across the touched files, 0 failures).
202 lines
7.7 KiB
Python
202 lines
7.7 KiB
Python
"""Regression tests for TOCTOU-safe credential file writers in ``hermes_cli.auth``.
|
|
|
|
Background
|
|
==========
|
|
The three writers below used to create a temp file via ``Path.write_text`` /
|
|
``Path.open('w')`` and only ``chmod``'d it to ``0o600`` afterward. Between
|
|
create and chmod the file existed at the process umask (typically ``0o644``),
|
|
briefly exposing OAuth tokens to other local users on multi-user hosts. The
|
|
fix switches them to ``os.open(O_EXCL, mode=0o600)`` + ``os.fdopen`` +
|
|
``fsync`` so the file is atomic at ``0o600`` on creation. Mirrors the fixes
|
|
shipped for ``agent/google_oauth.py`` (#19673) and ``tools/mcp_oauth.py``
|
|
(#21148).
|
|
|
|
These tests stay green only while the token file and its parent directory
|
|
end up at ``0o600`` / ``0o700`` after every write. POSIX-only — the mode-bit
|
|
enforcement does not exist on Windows.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
import os
|
|
import stat
|
|
import sys
|
|
from unittest.mock import patch
|
|
|
|
import pytest
|
|
|
|
|
|
pytestmark = pytest.mark.skipif(
|
|
sys.platform.startswith("win"),
|
|
reason="POSIX mode bits not enforced on Windows",
|
|
)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _save_auth_store (~/.hermes/auth.json — every native OAuth provider)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_save_auth_store_writes_0o600_with_0o700_parent(tmp_path, monkeypatch):
|
|
"""``_save_auth_store`` must land ``auth.json`` at 0o600 and parent at 0o700."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
old_umask = os.umask(0o022) # make the race observable if it regresses
|
|
try:
|
|
from hermes_cli import auth as auth_mod
|
|
|
|
auth_store = {
|
|
"version": auth_mod.AUTH_STORE_VERSION,
|
|
"providers": {"openai-codex": {"tokens": {"access_token": "secret-x"}}},
|
|
"active_provider": "openai-codex",
|
|
}
|
|
auth_path = auth_mod._save_auth_store(auth_store)
|
|
finally:
|
|
os.umask(old_umask)
|
|
|
|
mode = stat.S_IMODE(auth_path.stat().st_mode)
|
|
parent_mode = stat.S_IMODE(auth_path.parent.stat().st_mode)
|
|
|
|
assert mode == 0o600, (
|
|
f"auth.json mode 0o{mode:o} != 0o600 — TOCTOU race regressed"
|
|
)
|
|
assert parent_mode == 0o700, (
|
|
f"auth.json parent dir mode 0o{parent_mode:o} != 0o700 — siblings can traverse"
|
|
)
|
|
|
|
# Content survived the rewrite
|
|
data = json.loads(auth_path.read_text())
|
|
assert data["providers"]["openai-codex"]["tokens"]["access_token"] == "secret-x"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# _save_qwen_cli_tokens (Qwen CLI OAuth tokens)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_save_qwen_cli_tokens_writes_0o600_with_0o700_parent(tmp_path, monkeypatch):
|
|
"""``_save_qwen_cli_tokens`` must land the token file at 0o600 and parent at 0o700."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
# The Qwen CLI auth path lives under $HOME/.qwen by default — isolate it.
|
|
monkeypatch.setenv("HOME", str(tmp_path))
|
|
old_umask = os.umask(0o022)
|
|
try:
|
|
from hermes_cli import auth as auth_mod
|
|
|
|
tokens = {
|
|
"access_token": "qwen-secret",
|
|
"refresh_token": "qwen-refresh",
|
|
"token_type": "Bearer",
|
|
"expiry_date": 123,
|
|
}
|
|
auth_path = auth_mod._save_qwen_cli_tokens(tokens)
|
|
finally:
|
|
os.umask(old_umask)
|
|
|
|
mode = stat.S_IMODE(auth_path.stat().st_mode)
|
|
parent_mode = stat.S_IMODE(auth_path.parent.stat().st_mode)
|
|
|
|
assert mode == 0o600, (
|
|
f"Qwen token file mode 0o{mode:o} != 0o600 — TOCTOU race regressed"
|
|
)
|
|
assert parent_mode == 0o700, (
|
|
f"Qwen token parent dir mode 0o{parent_mode:o} != 0o700"
|
|
)
|
|
|
|
data = json.loads(auth_path.read_text())
|
|
assert data["access_token"] == "qwen-secret"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Nous shared-credential store write (inside _write_shared_nous_state)
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_shared_nous_store_writes_0o600_with_0o700_parent(tmp_path, monkeypatch):
|
|
"""The Nous shared-credential store must land at 0o600 / parent 0o700."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
# _nous_shared_store_path() refuses to touch the real shared store during
|
|
# pytest runs; redirect it into tmp_path explicitly. Use a distinct
|
|
# subdirectory name (``shared_override``) so the guard's "real user
|
|
# home" reference — which currently tracks HERMES_HOME via
|
|
# get_default_hermes_root() — can't collide with our override and
|
|
# falsely claim we're writing to the real user's shared store.
|
|
monkeypatch.setenv("HERMES_SHARED_AUTH_DIR", str(tmp_path / "shared_override"))
|
|
old_umask = os.umask(0o022)
|
|
try:
|
|
from hermes_cli import auth as auth_mod
|
|
|
|
state = {
|
|
"access_token": "nous-access-xxx",
|
|
"refresh_token": "nous-refresh-xxx",
|
|
"token_type": "Bearer",
|
|
"scope": "openid profile",
|
|
"client_id": "test-client",
|
|
"obtained_at": "2026-01-01T00:00:00Z",
|
|
"expires_at": "2026-01-01T01:00:00Z",
|
|
}
|
|
auth_mod._write_shared_nous_state(state)
|
|
path = auth_mod._nous_shared_store_path()
|
|
finally:
|
|
os.umask(old_umask)
|
|
|
|
assert path.exists(), "shared Nous store was not written"
|
|
mode = stat.S_IMODE(path.stat().st_mode)
|
|
parent_mode = stat.S_IMODE(path.parent.stat().st_mode)
|
|
|
|
assert mode == 0o600, (
|
|
f"Nous shared store mode 0o{mode:o} != 0o600 — TOCTOU race regressed"
|
|
)
|
|
assert parent_mode == 0o700, (
|
|
f"Nous shared store parent dir mode 0o{parent_mode:o} != 0o700"
|
|
)
|
|
|
|
data = json.loads(path.read_text())
|
|
assert data["refresh_token"] == "nous-refresh-xxx"
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Atomicity: verify ``os.open`` is called with an explicit 0o600 mode.
|
|
# ---------------------------------------------------------------------------
|
|
|
|
|
|
def test_save_auth_store_uses_os_open_with_0o600_mode(tmp_path, monkeypatch):
|
|
"""Regression: the writer must call ``os.open`` with an explicit restricted
|
|
mode so the file is created at 0o600 atomically — closing the TOCTOU
|
|
window the previous ``Path.open('w')`` left open (fd inherited process
|
|
umask and was briefly 0o644 before post-write chmod)."""
|
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
|
|
|
observed_opens: list[tuple[str, int, int]] = []
|
|
real_os_open = os.open
|
|
|
|
def spying_os_open(path, flags, mode=0o777, *args, **kwargs):
|
|
observed_opens.append((str(path), flags, mode))
|
|
return real_os_open(path, flags, mode, *args, **kwargs)
|
|
|
|
with patch.object(os, "open", spying_os_open):
|
|
from hermes_cli import auth as auth_mod
|
|
|
|
auth_mod._save_auth_store(
|
|
{"version": auth_mod.AUTH_STORE_VERSION, "providers": {}}
|
|
)
|
|
|
|
auth_tmp_opens = [
|
|
(p, fl, m) for (p, fl, m) in observed_opens if "auth.json.tmp" in p
|
|
]
|
|
assert auth_tmp_opens, (
|
|
f"os.open was never called for the auth.json temp file; "
|
|
f"observed={observed_opens!r}"
|
|
)
|
|
for path, flags, mode in auth_tmp_opens:
|
|
assert flags & os.O_CREAT, f"auth.json temp open missing O_CREAT: path={path}"
|
|
assert flags & os.O_EXCL, (
|
|
f"auth.json temp open missing O_EXCL — TOCTOU-safe pattern regressed: "
|
|
f"path={path}, flags={flags}"
|
|
)
|
|
# Must be exactly S_IRUSR | S_IWUSR (0o600) — no group/other bits.
|
|
expected = stat.S_IRUSR | stat.S_IWUSR
|
|
assert mode == expected, (
|
|
f"auth.json temp open mode 0o{mode:o} != 0o{expected:o} — "
|
|
f"umask would apply and potentially expose tokens"
|
|
)
|