Merge branch 'main' into docker_s6

This commit is contained in:
Ben Barclay 2026-05-25 09:39:27 +10:00 committed by GitHub
commit 59da190512
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
417 changed files with 26434 additions and 3321 deletions

View file

@ -8,6 +8,8 @@ depend on the registry being populated should use it explicitly or via
``@pytest.mark.usefixtures("web_registry_populated")``.
"""
from unittest.mock import patch
import pytest
@ -48,3 +50,20 @@ def web_registry_populated():
yield
from agent.web_search_registry import _reset_for_tests
_reset_for_tests()
@pytest.fixture
def disable_lazy_stt_install():
"""Disarm the runtime lazy-install probe so static ``_HAS_FASTER_WHISPER``
patches accurately simulate 'faster-whisper not installed'.
Without this, ``_try_lazy_install_stt()`` calls
``importlib.util.find_spec("faster_whisper")``, which returns truthy
whenever the package is installed in the dev / CI environment
defeating the test's ``_HAS_FASTER_WHISPER=False`` patch.
Opt in at module scope with
``pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")``.
"""
with patch("tools.transcription_tools._try_lazy_install_stt", return_value=False):
yield

View file

@ -1,6 +1,9 @@
"""Tests for the dangerous command approval module."""
import ast
import os
import threading
import time
from pathlib import Path
from types import SimpleNamespace
from unittest.mock import patch as mock_patch
@ -1305,3 +1308,165 @@ class TestEtcPatternsUnaffectedByRefactor:
def test_grep_etc_passwd_is_safe(self):
dangerous, _, _ = detect_dangerous_command("grep root /etc/passwd")
assert dangerous is False
# =========================================================================
# Gateway approval timeout = deny, NOT consent (#24912)
#
# A Slack user walked away mid-conversation; the agent requested approval
# to run `rm -rf .git`; the prompt timed out; the agent ran the command
# anyway. Reported by @tofalck on 2026-05-13, corroborated by
# @angry-programmer on Telegram. Silence is not consent.
#
# These tests pin:
# 1. Gateway timeout → approved=False, with a message strong enough that
# a downstream agent reading "BLOCKED: ... Silence is not consent."
# treats it as a hard halt, not an invitation to rephrase.
# 2. The structured outcome / user_consent fields are present so
# plugins, hooks, and audit pipelines can act on the timeout without
# string-parsing the message.
# 3. Explicit /deny carries the same shape (treat-as-not-consented).
# =========================================================================
class TestApprovalTimeoutIsNotConsent:
"""The gateway approval contract: silence is not consent (#24912)."""
SESSION_KEY = "test-no-consent-session"
def setup_method(self):
"""Reset module state and force tight gateway_timeout for fast tests."""
from tools import approval as mod
mod._gateway_queues.clear()
mod._gateway_notify_cbs.clear()
mod._session_approved.clear()
mod._permanent_approved.clear()
mod._pending.clear()
self._saved_env = {
k: os.environ.get(k)
for k in ("HERMES_GATEWAY_SESSION", "HERMES_YOLO_MODE",
"HERMES_SESSION_KEY", "HERMES_INTERACTIVE")
}
os.environ.pop("HERMES_YOLO_MODE", None)
os.environ.pop("HERMES_INTERACTIVE", None)
os.environ["HERMES_GATEWAY_SESSION"] = "1"
os.environ["HERMES_SESSION_KEY"] = self.SESSION_KEY
def teardown_method(self):
from tools import approval as mod
mod._gateway_queues.clear()
mod._gateway_notify_cbs.clear()
for k, v in self._saved_env.items():
if v is None:
os.environ.pop(k, None)
else:
os.environ[k] = v
def _force_short_timeout(self, monkeypatch, seconds=1):
from tools import approval as mod
monkeypatch.setattr(
mod, "_get_approval_config",
lambda: {"mode": "manual", "gateway_timeout": seconds, "timeout": seconds},
)
def test_timeout_returns_approved_false_with_no_consent(self, monkeypatch):
"""The reported #24912 scenario — user never responds, agent must see BLOCKED."""
from tools import approval as mod
self._force_short_timeout(monkeypatch, seconds=1)
# Slack-shaped: notify_cb registered, but user doesn't respond.
notified = []
mod.register_gateway_notify(self.SESSION_KEY, lambda data: notified.append(data))
result = mod.check_all_command_guards("rm -rf .git", "local")
assert result["approved"] is False
assert result.get("user_consent") is False
assert result.get("outcome") == "timeout"
# The notify_cb DID fire — we did try to ask the user.
assert len(notified) == 1
def test_timeout_message_is_emphatic_against_retry_and_rephrase(self, monkeypatch):
"""The BLOCKED message must explicitly tell the agent not to rephrase.
Without this, the agent treats 'Do NOT retry this command' as
permission to try a different command achieving the same outcome.
"""
from tools import approval as mod
self._force_short_timeout(monkeypatch, seconds=1)
mod.register_gateway_notify(self.SESSION_KEY, lambda data: None)
result = mod.check_all_command_guards("rm -rf .git", "local")
msg = result["message"]
# Explicit halt signals — these are the model-facing contract.
assert "BLOCKED" in msg
assert "NOT consented" in msg
assert "Silence is not consent" in msg
# Both forms of evasion must be named:
assert "do NOT retry" in msg.lower() or "Do NOT retry" in msg
assert "rephrase" in msg.lower()
assert "different command" in msg.lower()
def test_explicit_deny_carries_same_no_consent_shape(self):
"""An explicit /deny must produce the same shape as timeout —
the agent should treat both identically."""
from tools import approval as mod
notified = []
mod.register_gateway_notify(self.SESSION_KEY, lambda data: notified.append(data))
# Spawn the approval wait in a thread, then resolve it with "deny".
result_holder = {}
def _check():
result_holder["r"] = mod.check_all_command_guards("rm -rf .git", "local")
t = threading.Thread(target=_check)
t.start()
# Wait for the queue entry to appear, then resolve.
for _ in range(50):
if mod._gateway_queues.get(self.SESSION_KEY):
break
time.sleep(0.02)
mod.resolve_gateway_approval(self.SESSION_KEY, "deny")
t.join(timeout=5)
assert "r" in result_holder, "approval wait did not return after deny"
r = result_holder["r"]
assert r["approved"] is False
assert r.get("user_consent") is False
assert r.get("outcome") == "denied"
assert "Silence is not consent" not in r["message"] # this one IS denied, not timed-out
assert "NOT consented" in r["message"]
assert "rephrase" in r["message"].lower()
def test_timeout_emits_post_hook_with_timeout_outcome(self, monkeypatch):
"""Plugins must be able to distinguish timeout from explicit deny.
This is what an audit / notification plugin needs to alert
operators on 'agent asked, user never replied' incidents like #24912.
"""
from tools import approval as mod
self._force_short_timeout(monkeypatch, seconds=1)
mod.register_gateway_notify(self.SESSION_KEY, lambda data: None)
hook_calls = []
original_fire = mod._fire_approval_hook
def _capture(event_name, **kwargs):
hook_calls.append((event_name, kwargs))
return original_fire(event_name, **kwargs)
monkeypatch.setattr(mod, "_fire_approval_hook", _capture)
mod.check_all_command_guards("rm -rf .git", "local")
# post_approval_response must be in the hook log with choice=timeout
posts = [c for c in hook_calls if c[0] == "post_approval_response"]
assert posts, "post_approval_response hook did not fire"
last_post = posts[-1][1]
assert last_post.get("choice") == "timeout", (
f"hook choice should be 'timeout' on no-response, got {last_post.get('choice')!r}"
)

View file

@ -72,7 +72,7 @@ class TestReapOrphanedBrowserSessions:
assert not d.exists()
def test_orphaned_alive_daemon_is_killed(self, fake_tmpdir):
"""Alive daemon not tracked by _active_sessions gets SIGTERM (legacy path).
"""Alive daemon not tracked by _active_sessions is terminated (legacy path).
No owner_pid file => falls back to tracked_names check.
"""
@ -82,18 +82,17 @@ class TestReapOrphanedBrowserSessions:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
# Don't actually kill anything
def mock_terminate(pid):
kill_calls.append(pid)
# Post-#21561 the liveness probe goes through
# ``gateway.status._pid_exists`` (which wraps ``psutil.pid_exists``
# so it's safe on Windows — ``os.kill(pid, 0)`` is bpo-14484).
with patch("gateway.status._pid_exists", return_value=True), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
assert (12345, signal.SIGTERM) in kill_calls
assert 12345 in kill_calls
def test_tracked_session_is_not_reaped(self, fake_tmpdir):
"""Sessions tracked in _active_sessions are left alone (legacy path)."""
@ -108,13 +107,13 @@ class TestReapOrphanedBrowserSessions:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
def mock_terminate(pid):
kill_calls.append(pid)
with patch("os.kill", side_effect=mock_kill):
with patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
# Should NOT have tried to kill anything
# Should NOT have tried to terminate anything
assert len(kill_calls) == 0
# Dir should still exist
assert d.exists()
@ -126,23 +125,24 @@ class TestReapOrphanedBrowserSessions:
``gateway.status._pid_exists`` (which wraps ``psutil.pid_exists``
because ``os.kill(pid, 0)`` is a footgun on Windows bpo-14484).
With no owner_pid file and no tracked-name entry, the reaper
SIGTERMs the daemon and removes its socket dir regardless of
whether SIGTERM succeeded (best-effort semantics).
terminates the daemon (and its process tree) and removes its socket
dir regardless of whether termination succeeded (best-effort
semantics).
"""
from tools.browser_tool import _reap_orphaned_browser_sessions
d = _make_socket_dir(fake_tmpdir, "h_perm1234567", pid=12345)
sigterm_calls = []
terminate_calls = []
def mock_kill(pid, sig):
sigterm_calls.append((pid, sig))
def mock_terminate(pid):
terminate_calls.append(pid)
with patch("gateway.status._pid_exists", return_value=True), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
assert (12345, signal.SIGTERM) in sigterm_calls
assert 12345 in terminate_calls
assert not d.exists()
def test_cdp_sessions_are_also_reaped(self, fake_tmpdir):
@ -203,15 +203,15 @@ class TestOwnerPidCrossProcess:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
def mock_terminate(pid):
kill_calls.append(pid)
# Owner alive → reaper skips without ever probing the daemon.
with patch("gateway.status._pid_exists", return_value=True), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
assert (12345, signal.SIGTERM) not in kill_calls
assert 12345 not in kill_calls
assert d.exists()
def test_dead_owner_triggers_reap(self, fake_tmpdir):
@ -225,17 +225,17 @@ class TestOwnerPidCrossProcess:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
def mock_terminate(pid):
kill_calls.append(pid)
# Owner 999999999 dead, daemon 12345 alive.
pid_alive = {999999999: False, 12345: True}
with patch("gateway.status._pid_exists",
side_effect=lambda pid: pid_alive.get(int(pid), False)), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
assert (12345, signal.SIGTERM) in kill_calls
assert 12345 in kill_calls
assert not d.exists()
def test_corrupt_owner_pid_falls_back_to_legacy(self, fake_tmpdir):
@ -253,15 +253,15 @@ class TestOwnerPidCrossProcess:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
def mock_terminate(pid):
kill_calls.append(pid)
with patch("gateway.status._pid_exists", return_value=True), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
# Legacy path took over → tracked → not reaped
assert (12345, signal.SIGTERM) not in kill_calls
assert 12345 not in kill_calls
assert d.exists()
def test_owner_pid_permission_error_treated_as_alive(self, fake_tmpdir):
@ -280,16 +280,16 @@ class TestOwnerPidCrossProcess:
kill_calls = []
def mock_kill(pid, sig):
kill_calls.append((pid, sig))
def mock_terminate(pid):
kill_calls.append(pid)
# Owner 22222 reported alive (PermissionError collapses to True
# inside _pid_exists). Daemon never probed, never SIGTERMed.
# inside _pid_exists). Daemon never probed, never terminated.
with patch("gateway.status._pid_exists", return_value=True), \
patch("os.kill", side_effect=mock_kill):
patch("tools.process_registry.ProcessRegistry._terminate_host_pid", side_effect=mock_terminate):
_reap_orphaned_browser_sessions()
assert (12345, signal.SIGTERM) not in kill_calls
assert 12345 not in kill_calls
assert d.exists()
def test_write_owner_pid_creates_file_with_current_pid(

View file

@ -31,7 +31,13 @@ class TestBrowserSecretExfil:
def test_allows_normal_url(self):
"""Normal URLs pass the secret check (may fail for other reasons)."""
from tools.browser_tool import browser_navigate
result = browser_navigate("https://github.com/NousResearch/hermes-agent")
# Patch the actual browser command — we only care that the secret
# check doesn't block a clean URL, not that Chrome starts in CI.
mock_result = {"success": True, "data": {"title": "ok", "url": "https://github.com/NousResearch/hermes-agent"}}
with patch("tools.browser_tool._run_browser_command", return_value=mock_result), \
patch("tools.browser_tool._get_session_info", return_value={"_first_nav": False}), \
patch("tools.browser_tool._is_local_backend", return_value=True):
result = browser_navigate("https://github.com/NousResearch/hermes-agent")
parsed = json.loads(result)
# Should NOT be blocked by secret detection
assert "API key or token" not in parsed.get("error", "")

View file

@ -76,6 +76,27 @@ class TestSchema:
modes = set(COMPUTER_USE_SCHEMA["parameters"]["properties"]["mode"]["enum"])
assert modes == {"som", "vision", "ax"}
def test_schema_exposes_max_elements_cap_for_capture(self):
from tools.computer_use.schema import COMPUTER_USE_SCHEMA
props = COMPUTER_USE_SCHEMA["parameters"]["properties"]
assert "max_elements" in props
assert props["max_elements"]["type"] == "integer"
assert props["max_elements"].get("minimum", 1) >= 1
def test_schema_max_elements_documents_default_and_upper_bound(self):
"""Schema description must agree with the runtime. The original PR
text said "Default 100" without a corresponding `default` field, and
had no upper bound both Copilot findings.
"""
from tools.computer_use.schema import COMPUTER_USE_SCHEMA
from tools.computer_use.tool import (
_DEFAULT_MAX_ELEMENTS,
_MAX_ALLOWED_MAX_ELEMENTS,
)
prop = COMPUTER_USE_SCHEMA["parameters"]["properties"]["max_elements"]
assert prop.get("default") == _DEFAULT_MAX_ELEMENTS
assert prop.get("maximum") == _MAX_ALLOWED_MAX_ELEMENTS
class TestRegistration:
def test_tool_registers_with_registry(self):
@ -205,6 +226,54 @@ class TestDispatch:
parsed = json.loads(out)
assert "error" in parsed
def test_set_value_routes_to_backend(self, noop_backend):
"""set_value must reach the backend — regression for missing _NoopBackend stub."""
from tools.computer_use.tool import handle_computer_use
out = handle_computer_use({"action": "set_value", "value": "Option A", "element": 5})
parsed = json.loads(out)
assert parsed.get("ok") is True
assert parsed.get("action") == "set_value"
assert any(c[0] == "set_value" for c in noop_backend.calls)
def test_set_value_missing_value_returns_error(self, noop_backend):
from tools.computer_use.tool import handle_computer_use
out = handle_computer_use({"action": "set_value"})
parsed = json.loads(out)
assert "error" in parsed
def test_capture_after_skipped_when_action_failed(self, noop_backend):
"""capture_after must not fire when res.ok=False (regression guard).
A follow-up screenshot after a failed action shows the screen in a
normal state, misleading the model into thinking the action succeeded.
"""
from unittest.mock import patch
from tools.computer_use.backend import ActionResult
from tools.computer_use.tool import handle_computer_use
# Make click() return a failure.
with patch.object(noop_backend, "click",
return_value=ActionResult(ok=False, action="click",
message="element not found")):
out = handle_computer_use({"action": "click", "element": 99,
"capture_after": True})
parsed = json.loads(out)
# Should return the error, not a multimodal capture.
assert parsed.get("ok") is False
assert parsed.get("action") == "click"
# No follow-up capture should have been issued.
capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
assert len(capture_calls) == 0, "capture must not be called after a failed action"
def test_capture_after_fires_when_action_succeeds(self, noop_backend):
"""capture_after must trigger for successful actions."""
from tools.computer_use.tool import handle_computer_use
out = handle_computer_use({"action": "click", "element": 1,
"capture_after": True})
# Noop backend returns ok=True, so capture should have been called.
capture_calls = [c for c in noop_backend.calls if c[0] == "capture"]
assert len(capture_calls) == 1
# ---------------------------------------------------------------------------
# Safety guards (type / key block lists)
@ -337,6 +406,193 @@ class TestCaptureResponse:
assert "AXButton" in text_part["text"]
assert "AXTextField" in text_part["text"]
def _ax_backend_with(self, count: int):
"""Construct a fake backend that yields ``count`` AX elements."""
from tools.computer_use.backend import CaptureResult, UIElement
elements = [
UIElement(index=i + 1, role="AXButton", label=f"el-{i}", bounds=(0, 0, 1, 1))
for i in range(count)
]
class FakeBackend:
def start(self): pass
def stop(self): pass
def is_available(self): return True
def capture(self, mode="som", app=None):
return CaptureResult(
mode=mode, width=800, height=600,
png_b64="",
elements=list(elements),
app="Obsidian",
)
def click(self, **kw): ...
def drag(self, **kw): ...
def scroll(self, **kw): ...
def type_text(self, text): ...
def key(self, keys): ...
def list_apps(self): return []
def focus_app(self, app, raise_window=False): ...
return FakeBackend()
def test_capture_ax_caps_elements_at_default_for_dense_trees(self):
"""Regression for #22865: an Electron-style 600-element AX tree must
not emit the entire array verbatim into the tool result.
"""
from tools.computer_use import tool as cu_tool
fake_backend = self._ax_backend_with(600)
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
out = cu_tool.handle_computer_use({"action": "capture", "mode": "ax"})
parsed = json.loads(out)
assert parsed["mode"] == "ax"
assert parsed["total_elements"] == 600
assert len(parsed["elements"]) == cu_tool._DEFAULT_MAX_ELEMENTS
assert parsed["truncated_elements"] == 600 - cu_tool._DEFAULT_MAX_ELEMENTS
# Truncation must be visible in the human summary so the model knows
# the JSON view is partial and can re-issue with a tighter scope.
assert "truncated to" in parsed["summary"]
def test_capture_ax_honors_explicit_max_elements_override(self):
from tools.computer_use import tool as cu_tool
fake_backend = self._ax_backend_with(600)
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
out = cu_tool.handle_computer_use(
{"action": "capture", "mode": "ax", "max_elements": 250}
)
parsed = json.loads(out)
assert len(parsed["elements"]) == 250
assert parsed["truncated_elements"] == 350
def test_capture_ax_below_cap_is_unchanged(self):
"""Backwards-compat: small captures keep the full elements array and
do not surface a `truncated_elements` field.
"""
from tools.computer_use import tool as cu_tool
fake_backend = self._ax_backend_with(5)
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
out = cu_tool.handle_computer_use({"action": "capture", "mode": "ax"})
parsed = json.loads(out)
assert len(parsed["elements"]) == 5
assert parsed["total_elements"] == 5
assert "truncated_elements" not in parsed
assert "truncated to" not in parsed["summary"]
def test_capture_ax_invalid_max_elements_falls_back_to_default(self):
"""Malformed `max_elements` (string, negative, zero) must not silently
disable the cap and re-introduce the original unbounded behavior.
"""
from tools.computer_use import tool as cu_tool
fake_backend = self._ax_backend_with(600)
cu_tool.reset_backend_for_tests()
for bad in ("not-a-number", 0, -10):
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
out = cu_tool.handle_computer_use(
{"action": "capture", "mode": "ax", "max_elements": bad}
)
parsed = json.loads(out)
assert len(parsed["elements"]) == cu_tool._DEFAULT_MAX_ELEMENTS, (
f"bad max_elements={bad!r} disabled the cap"
)
def test_capture_ax_clamps_oversized_max_elements_to_hard_cap(self):
"""A caller passing a very large `max_elements` must not be able to
disable the safeguard. The cap is clamped to a hard upper bound so
the context-blow-up protection cannot be bypassed by argument.
"""
from tools.computer_use import tool as cu_tool
fake_backend = self._ax_backend_with(5000)
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
out = cu_tool.handle_computer_use(
{"action": "capture", "mode": "ax", "max_elements": 10_000}
)
parsed = json.loads(out)
assert len(parsed["elements"]) == cu_tool._MAX_ALLOWED_MAX_ELEMENTS
assert parsed["total_elements"] == 5000
assert parsed["truncated_elements"] == 5000 - cu_tool._MAX_ALLOWED_MAX_ELEMENTS
def test_capture_ax_summary_indices_match_returned_elements(self):
"""When `max_elements` is below the human-summary's own line cap, the
summary must not index elements that aren't in the returned array.
Otherwise the model sees `#15` in the summary and finds no matching
entry in `elements`.
"""
from tools.computer_use import tool as cu_tool
fake_backend = self._ax_backend_with(600)
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=fake_backend):
out = cu_tool.handle_computer_use(
{"action": "capture", "mode": "ax", "max_elements": 5}
)
parsed = json.loads(out)
returned_indices = {e["index"] for e in parsed["elements"]}
summary_lines = parsed["summary"].splitlines()
indexed_lines = [ln for ln in summary_lines if ln.lstrip().startswith("#")]
for ln in indexed_lines:
idx_token = ln.lstrip().split()[0].lstrip("#")
idx = int(idx_token)
assert idx in returned_indices, (
f"summary references #{idx} but it is absent from elements payload "
f"(returned: {sorted(returned_indices)})"
)
def test_capture_multimodal_summary_omits_truncation_note(self):
"""The som/vision multimodal envelope returns a screenshot, not an
`elements` array so a "response truncated to N of M elements"
claim in the summary would be inaccurate.
"""
from tools.computer_use.backend import CaptureResult, UIElement
from tools.computer_use import tool as cu_tool
fake_png = "iVBORw0KGgo="
elements = [
UIElement(index=i + 1, role="AXButton", label=f"el-{i}", bounds=(0, 0, 1, 1))
for i in range(600)
]
class FakeBackend:
def start(self): pass
def stop(self): pass
def is_available(self): return True
def capture(self, mode="som", app=None):
return CaptureResult(
mode=mode, width=800, height=600,
png_b64=fake_png, elements=list(elements),
app="Obsidian",
)
def click(self, **kw): ...
def drag(self, **kw): ...
def scroll(self, **kw): ...
def type_text(self, text): ...
def key(self, keys): ...
def list_apps(self): return []
def focus_app(self, app, raise_window=False): ...
cu_tool.reset_backend_for_tests()
with patch.object(cu_tool, "_get_backend", return_value=FakeBackend()):
out = cu_tool.handle_computer_use({"action": "capture", "mode": "som"})
assert isinstance(out, dict) and out["_multimodal"] is True
text_part = next(p for p in out["content"] if p.get("type") == "text")
assert "truncated to" not in text_part["text"], (
"multimodal response carries an image, not an elements array; "
"the truncation note describes a payload field that isn't present"
)
assert "truncated to" not in out["text_summary"]
# ---------------------------------------------------------------------------
# Anthropic adapter: multimodal tool-result conversion

View file

@ -0,0 +1,259 @@
"""Tests for the cross-profile soft guard wired into write_file / patch /
skill_manage.
The classifier is tested in tests/agent/test_file_safety_cross_profile.py.
This file tests that the tool surfaces:
1. Refuse cross-profile writes by default and return the warning.
2. Accept cross-profile writes when cross_profile=True is passed.
3. Continue to accept in-profile writes normally.
4. skill_manage's "not found" error names other profiles where the
skill exists.
"""
from __future__ import annotations
import json
import os
from pathlib import Path
import pytest
@pytest.fixture
def fake_hermes(tmp_path, monkeypatch):
"""Build a two-profile Hermes layout and point HERMES_HOME at
the hermes-security profile (matching the original-incident shape).
"""
root = tmp_path / "fake-hermes"
(root / "skills" / "shared-skill").mkdir(parents=True)
(root / "skills" / "shared-skill" / "SKILL.md").write_text(
"---\nname: shared-skill\ndescription: default copy.\n---\n"
)
sec_home = root / "profiles" / "hermes-security"
(sec_home / "skills").mkdir(parents=True)
coder_home = root / "profiles" / "coder"
(coder_home / "skills").mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(sec_home))
import hermes_constants
monkeypatch.setattr(hermes_constants, "get_default_hermes_root", lambda: root)
import agent.file_safety as fs
monkeypatch.setattr(fs, "_hermes_home_path", lambda: sec_home)
monkeypatch.setattr(fs, "_hermes_root_path", lambda: root)
return {
"root": root,
"sec_home": sec_home,
"coder_home": coder_home,
}
# ---------------------------------------------------------------------------
# write_file
# ---------------------------------------------------------------------------
class TestWriteFileCrossProfileGuard:
def test_in_profile_write_allowed(self, fake_hermes):
from tools.file_tools import write_file_tool
target = fake_hermes["sec_home"] / "skills" / "new-skill" / "SKILL.md"
target.parent.mkdir(parents=True)
result_json = write_file_tool(str(target), "in-profile content")
result = json.loads(result_json)
assert not result.get("error"), f"In-profile write should succeed: {result}"
assert target.exists()
assert target.read_text() == "in-profile content"
def test_cross_profile_write_blocked_by_default(self, fake_hermes):
"""The May 2026 incident — security-profile session edits default
profile's skill. Must be blocked."""
from tools.file_tools import write_file_tool
target = fake_hermes["root"] / "skills" / "shared-skill" / "SKILL.md"
original = target.read_text()
result_json = write_file_tool(str(target), "OVERWRITTEN")
result = json.loads(result_json)
assert result.get("error"), "Cross-profile write should be refused"
assert "cross-profile" in result["error"].lower()
assert "default" in result["error"]
assert "hermes-security" in result["error"]
# File untouched.
assert target.read_text() == original
def test_cross_profile_True_bypass(self, fake_hermes):
"""Explicit override after user direction must succeed."""
from tools.file_tools import write_file_tool
target = fake_hermes["root"] / "skills" / "shared-skill" / "SKILL.md"
result_json = write_file_tool(
str(target), "user-directed override", cross_profile=True
)
result = json.loads(result_json)
assert not result.get("error"), f"cross_profile=True must succeed: {result}"
assert target.read_text() == "user-directed override"
def test_non_hermes_path_unaffected(self, fake_hermes, tmp_path):
from tools.file_tools import write_file_tool
target = tmp_path / "outside" / "main.py"
target.parent.mkdir()
result_json = write_file_tool(str(target), "print('hello')")
result = json.loads(result_json)
assert not result.get("error")
assert target.exists()
# ---------------------------------------------------------------------------
# patch
# ---------------------------------------------------------------------------
class TestPatchCrossProfileGuard:
def test_cross_profile_patch_blocked(self, fake_hermes):
from tools.file_tools import patch_tool
target = fake_hermes["root"] / "skills" / "shared-skill" / "SKILL.md"
original = target.read_text()
result_json = patch_tool(
mode="replace",
path=str(target),
old_string="default copy.",
new_string="HIJACKED.",
)
result = json.loads(result_json)
assert result.get("error")
assert "cross-profile" in result["error"].lower()
assert target.read_text() == original
def test_cross_profile_patch_bypass(self, fake_hermes):
from tools.file_tools import patch_tool
target = fake_hermes["root"] / "skills" / "shared-skill" / "SKILL.md"
result_json = patch_tool(
mode="replace",
path=str(target),
old_string="default copy.",
new_string="user-directed update.",
cross_profile=True,
)
result = json.loads(result_json)
assert not result.get("error"), f"cross_profile=True bypass: {result}"
assert "user-directed update." in target.read_text()
def test_v4a_patch_extracts_path_for_guard(self, fake_hermes):
"""V4A patches embed the target paths in the patch body, not in
a ``path`` kwarg. The guard must still apply."""
from tools.file_tools import patch_tool
target = fake_hermes["root"] / "skills" / "shared-skill" / "SKILL.md"
original = target.read_text()
v4a = (
"*** Begin Patch\n"
f"*** Update File: {target}\n"
"@@\n"
"-default copy.\n"
"+HIJACKED.\n"
"*** End Patch"
)
result_json = patch_tool(mode="patch", patch=v4a)
result = json.loads(result_json)
assert result.get("error"), f"V4A cross-profile must block: {result}"
assert "cross-profile" in result["error"].lower()
assert target.read_text() == original
# ---------------------------------------------------------------------------
# skill_manage — error message naming other profile (item D)
# ---------------------------------------------------------------------------
class TestSkillManageCrossProfileErrorUX:
def _make_skill_in_profile(self, profile_dir: Path, name: str):
d = profile_dir / "skills" / name
d.mkdir(parents=True, exist_ok=True)
(d / "SKILL.md").write_text(
f"---\nname: {name}\ndescription: a skill.\n---\n"
)
def test_error_names_other_profile_when_skill_lives_there(
self, fake_hermes, monkeypatch
):
"""The original incident shape — model expects 'foo' in active
profile, but 'foo' lives in default. Error must point at default."""
self._make_skill_in_profile(fake_hermes["root"], "default-only-skill")
# Re-import the module so SKILLS_DIR picks up HERMES_HOME (set in
# the fixture). Skill_manager_tool computes SKILLS_DIR at import.
import importlib
import tools.skill_manager_tool
importlib.reload(tools.skill_manager_tool)
from tools.skill_manager_tool import _skill_not_found_error
err = _skill_not_found_error("default-only-skill")
assert "not found in active profile 'hermes-security'" in err
assert "default" in err
assert "cross_profile=True" in err
def test_error_names_multiple_profiles(self, fake_hermes, monkeypatch):
"""When the skill exists in TWO other profiles, both should be named."""
self._make_skill_in_profile(fake_hermes["root"], "everywhere-skill")
self._make_skill_in_profile(fake_hermes["coder_home"], "everywhere-skill")
import importlib
import tools.skill_manager_tool
importlib.reload(tools.skill_manager_tool)
from tools.skill_manager_tool import _skill_not_found_error
err = _skill_not_found_error("everywhere-skill")
assert "default" in err
assert "coder" in err
# Switch-profiles hint
assert "hermes -p" in err
def test_genuinely_missing_skill_keeps_helpful_hint(
self, fake_hermes, monkeypatch
):
"""When no profile has the skill, error falls back to skills_list hint."""
import importlib
import tools.skill_manager_tool
importlib.reload(tools.skill_manager_tool)
from tools.skill_manager_tool import _skill_not_found_error
err = _skill_not_found_error("totally-imaginary-skill")
assert "not found in active profile 'hermes-security'" in err
assert "skills_list" in err
# ---------------------------------------------------------------------------
# System prompt active-profile line (item B)
# ---------------------------------------------------------------------------
class TestSystemPromptActiveProfile:
def test_default_profile_line_in_prompt(self, tmp_path, monkeypatch):
"""When active profile is 'default', the prompt names it and warns
about ~/.hermes/profiles/<name>/."""
# Don't set HERMES_HOME — falls back to default.
import agent.file_safety as fs
monkeypatch.setattr(fs, "_hermes_home_path", lambda: tmp_path / "fake")
monkeypatch.setattr(fs, "_hermes_root_path", lambda: tmp_path / "fake")
from agent.file_safety import _resolve_active_profile_name
assert _resolve_active_profile_name() == "default"
# Build the line manually to pin the contract — the prompt builder
# is too heavy to instantiate end-to-end in a unit test.
# See agent/system_prompt.py for the exact wording.
def test_named_profile_line_in_prompt_text(self, fake_hermes):
"""When active profile is 'hermes-security', the prompt warns
explicitly about NOT modifying default's skills/plugins/cron/memories."""
# Spot-check by reading the source — the contract is:
# (1) names the active profile, (2) names the default-profile
# paths, (3) says "do not modify another profile's" without
# explicit user direction.
from pathlib import Path
src = Path("agent/system_prompt.py").read_text()
assert "Active Hermes profile" in src
assert "cross_profile=True" in src
assert "~/.hermes/profiles/" in src
# Both branches present (default and named profile).
assert "Active Hermes profile: default" in src
assert "Active Hermes profile: {active_profile}" in src

View file

@ -60,6 +60,113 @@ class TestIsWriteDenied:
def test_tilde_expansion(self):
assert _is_write_denied("~/.ssh/authorized_keys") is True
@pytest.mark.parametrize(
"path",
[
"auth.json",
"config.yaml",
"webhook_subscriptions.json",
"mcp-tokens/token1.json",
"mcp-tokens/subdir/token2.json",
"pairing/telegram-approved.json",
"pairing/discord-approved.json",
"pairing/telegram-pending.json",
"pairing",
],
)
def test_hermes_control_files_and_mcp_tokens_denied(self, path):
"""Hermes control files and mcp-tokens/pairing entries must be write-denied."""
from hermes_constants import get_hermes_home
hermes_home = get_hermes_home()
full_path = str(hermes_home / path)
assert _is_write_denied(full_path) is True
@pytest.mark.parametrize(
"path",
[
"dummy/../config.yaml",
"./auth.json",
"mcp-tokens/../config.yaml",
],
)
def test_hermes_control_files_traversal_denied(self, path):
"""Path traversal attempts to control files must be blocked by realpath."""
from hermes_constants import get_hermes_home
hermes_home = get_hermes_home()
full_path = str(hermes_home / path)
assert _is_write_denied(full_path) is True
@pytest.mark.parametrize(
"path",
[
"/tmp/standard_file.txt",
"~/projects/myapp/main.py",
"/var/log/app.log",
],
)
def test_standard_paths_allowed(self, path):
"""Unrelated paths must still be allowed."""
assert _is_write_denied(path) is False
@pytest.mark.parametrize(
"name",
["auth.json", "config.yaml", "webhook_subscriptions.json"],
)
def test_control_files_protected_in_profile_mode(self, tmp_path, monkeypatch, name):
"""Under a profile, BOTH <profile>/X and <root>/X must be denied (#15981 shape).
Without the root-level pass, a profile-mode session leaves the
global ~/.hermes/{auth.json,config.yaml,webhook_subscriptions.json}
writable the same gap PR #15981 fixed for .env.
"""
# Simulate a profile-mode HERMES_HOME layout:
# <root>/profiles/coder/{auth.json,config.yaml,...}
# <root>/{auth.json,config.yaml,...} ← must also be denied
root = tmp_path / "hermes"
profile = root / "profiles" / "coder"
profile.mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(profile))
# Profile copy
assert _is_write_denied(str(profile / name)) is True
# Root copy — the gap this widening closes
assert _is_write_denied(str(root / name)) is True
def test_mcp_tokens_dir_protected_in_profile_mode(self, tmp_path, monkeypatch):
"""mcp-tokens/ under profile AND under root must both be denied."""
root = tmp_path / "hermes"
profile = root / "profiles" / "coder"
profile.mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(profile))
assert _is_write_denied(str(profile / "mcp-tokens" / "tok.json")) is True
assert _is_write_denied(str(root / "mcp-tokens" / "tok.json")) is True
# The directory itself must also be denied (not just files inside)
assert _is_write_denied(str(root / "mcp-tokens")) is True
def test_pairing_dir_denied(self, tmp_path, monkeypatch):
"""Regression: pairing/ must be write-denied under both profile and root.
PR #30383 introduced ~/.hermes/pairing/{platform}-approved.json as the
gateway access-control list. Without this block, a prompt-injected agent
can write arbitrary user IDs into an approved file, granting persistent
gateway access without going through the pairing code flow the same
threat class that motivated protecting webhook_subscriptions.json.
"""
root = tmp_path / "hermes"
profile = root / "profiles" / "coder"
profile.mkdir(parents=True)
monkeypatch.setenv("HERMES_HOME", str(profile))
# Active profile pairing entries
assert _is_write_denied(str(profile / "pairing" / "telegram-approved.json")) is True
assert _is_write_denied(str(profile / "pairing" / "discord-pending.json")) is True
# The directory itself
assert _is_write_denied(str(profile / "pairing")) is True
# Root pairing entries (profile mode — same shape as mcp-tokens gap)
assert _is_write_denied(str(root / "pairing" / "telegram-approved.json")) is True
assert _is_write_denied(str(root / "pairing")) is True
# =========================================================================

View file

@ -48,8 +48,14 @@ def _process_group_snapshot(pgid: int) -> str:
).stdout.strip()
def _wait_for_pgid_exit(pgid: int, timeout: float = 10.0) -> bool:
"""Wait for a process group to disappear under loaded xdist hosts."""
def _wait_for_pgid_exit(pgid: int, timeout: float = 30.0) -> bool:
"""Wait for a process group to disappear under loaded xdist hosts.
The cleanup chain is: SIGTERM 3s TimeoutStopSec SIGKILL reap.
Under heavy xdist load (40 parallel workers, 6-shard CI), the full
sequence can exceed 10s. Default timeout is generous to avoid CI
flakes; in practice the wait returns in <1s on quiet hosts.
"""
deadline = time.monotonic() + timeout
while time.monotonic() < deadline:
if not _pgid_still_alive(pgid):
@ -166,9 +172,11 @@ def test_wait_for_process_kills_subprocess_on_keyboardinterrupt():
assert ret == 1, f"SetAsyncExc returned {ret}, expected 1"
# Give the worker a moment to: hit the exception at the next poll,
# run the except-block cleanup (_kill_process), and exit.
t.join(timeout=5.0)
assert not t.is_alive(), "worker didn't exit within 5 s of the interrupt"
# run the except-block cleanup (_kill_process), and exit. Under
# xdist load the SIGTERM → 3s wait → SIGKILL chain can take longer
# than 5s before the worker's join() returns; bumped to 15s.
t.join(timeout=15.0)
assert not t.is_alive(), "worker didn't exit within 15 s of the interrupt"
# The critical assertion: the subprocess GROUP must be dead. Not
# just the bash wrapper — the 'sleep 30' child too. Under xdist load,

View file

@ -1462,6 +1462,27 @@ class TestHTTPConfig:
asyncio.run(_test())
def test_stdio_unavailable_raises_importerror_not_nameerror(self):
"""Regression test for #30904.
When the mcp SDK isn't installed, ``_run_stdio`` previously leaked a
bare ``NameError: name 'StdioServerParameters' is not defined``. The
gate now raises a clear ``ImportError`` with install instructions,
mirroring ``_run_http``'s behaviour when the HTTP transport is
unavailable.
"""
from tools.mcp_tool import MCPServerTask
server = MCPServerTask("local")
config = {"command": "python3", "args": ["/tmp/echo.py"]}
async def _test():
with patch("tools.mcp_tool._MCP_AVAILABLE", False):
with pytest.raises(ImportError, match=r"mcp.*SDK"):
await server._run_stdio(config)
asyncio.run(_test())
def test_http_seeds_initial_protocol_header(self):
from tools.mcp_tool import LATEST_PROTOCOL_VERSION, MCPServerTask

View file

@ -255,3 +255,128 @@ class TestMemoryToolDispatcher:
def test_remove_requires_old_text(self, store):
result = json.loads(memory_tool(action="remove", store=store))
assert result["success"] is False
# =========================================================================
# External drift guard (#26045)
#
# An external writer — patch tool, shell append, manual edit, or sister
# session — can grow MEMORY.md beyond the tool's mental model: no §
# delimiters, content that would all collapse into a single "entry" larger
# than the char limit. Pre-fix, the next memory(action=replace) from a
# session with stale in-memory state truncated that giant entry, silently
# discarding the appended bytes. Reproduced in production on 2026-05-14 —
# ~8KB of structured vendor / standing-orders / pinboard content destroyed
# by a sister session's replace.
# =========================================================================
class TestExternalDriftGuard:
"""Mutations must refuse to flush when on-disk content shows external drift."""
def _plant_drift(self, store, target="memory"):
"""Append free-form content (no § delimiters) past char_limit."""
path = store._path_for(target)
path.parent.mkdir(parents=True, exist_ok=True)
# 800 chars per entry × 3 sections == ~2.4KB without delimiters,
# well over the test fixture's 500-char limit.
block = "\n\n## Vendor Master\n" + "x" * 800
block += "\n\n## Standing Orders\n" + "y" * 800
block += "\n\n## Pin Board\n" + "z" * 800
existing = path.read_text(encoding="utf-8") if path.exists() else ""
path.write_text(existing + block, encoding="utf-8")
return path
def test_replace_refuses_on_drift(self, store):
store.add("memory", "User likes brevity.")
path = self._plant_drift(store)
original_size = path.stat().st_size
result = store.replace("memory", "User likes", "User prefers concise.")
assert result["success"] is False
assert "drift_backup" in result
# On-disk file is UNTOUCHED — that's the point.
assert path.stat().st_size == original_size
assert "Vendor Master" in path.read_text()
# Backup exists with the drifted content.
bak = result["drift_backup"]
assert Path(bak).exists()
assert "Vendor Master" in Path(bak).read_text()
def test_add_refuses_on_drift(self, store):
store.add("memory", "Existing.")
path = self._plant_drift(store)
original = path.read_text()
result = store.add("memory", "New entry under drift.")
assert result["success"] is False
assert "drift_backup" in result
assert path.read_text() == original # untouched
def test_remove_refuses_on_drift(self, store):
store.add("memory", "Target entry to remove.")
path = self._plant_drift(store)
original = path.read_text()
result = store.remove("memory", "Target entry")
assert result["success"] is False
assert "drift_backup" in result
assert path.read_text() == original # untouched
def test_clean_file_does_not_trigger_drift(self, store):
"""A normally-written file (just below char_limit, §-delimited) is fine."""
# Two tool-shaped entries totaling under the 500-char limit.
store.add("memory", "Entry one — normal length.")
store.add("memory", "Entry two — also normal.")
result = store.add("memory", "Entry three.")
assert result["success"] is True
assert "drift_backup" not in result
result = store.replace("memory", "Entry two", "Entry two replaced.")
assert result["success"] is True
def test_error_message_points_at_remediation(self, store):
"""The error string must reference the backup AND remediation steps."""
store.add("memory", "Initial.")
self._plant_drift(store)
result = store.replace("memory", "Initial", "Replacement.")
assert result["success"] is False
# The model has to know what file to look at and what to do.
assert ".bak." in result["error"]
assert "remediation" in result
assert "26045" in result["error"] # tracking-issue back-reference
def test_drift_guard_also_protects_user_target(self, store):
"""USER.md gets the same guarantee as MEMORY.md."""
store.add("user", "Some preference.")
path = self._plant_drift(store, target="user")
original_size = path.stat().st_size
result = store.replace("user", "Some preference", "New preference.")
assert result["success"] is False
assert path.stat().st_size == original_size
def test_drift_backup_filename_is_unique_per_invocation(self, store):
"""Two drift refusals close together must not collide on bak.<ts>.
If two refusals share the same epoch second, the second call would
overwrite the first .bak. The current implementation accepts that
both files describe the same on-disk state but pin the path
format here so any future change has to think about it.
"""
store.add("memory", "Initial.")
self._plant_drift(store)
r1 = store.replace("memory", "Initial", "Replacement.")
r2 = store.add("memory", "Another.")
assert r1.get("drift_backup")
assert r2.get("drift_backup")
# Same epoch second is the expected collision case — both point
# at the same snapshot. Different second is also fine.
assert ".bak." in r1["drift_backup"]
assert ".bak." in r2["drift_backup"]

View file

@ -348,3 +348,158 @@ class TestCompletionConsumed:
result = registry.poll("proc_running")
assert result["status"] == "running"
assert not registry.is_completion_consumed("proc_running")
# ---------------------------------------------------------------------------
# Silent-background-process hint
#
# background=True without notify_on_complete=True OR watch_patterns runs
# the process silently — the agent has no way to learn it finished short
# of calling process(action="poll") explicitly. The tool result must
# include a "hint" field that nudges the agent toward
# notify_on_complete=True for bounded tasks. May 2026 PR #31231 incident:
# bg CI poller exited green, agent never noticed, user had to surface it.
# ---------------------------------------------------------------------------
def _silent_bg_base_config(tmp_path):
return {
"env_type": "local",
"docker_image": "",
"singularity_image": "",
"modal_image": "",
"daytona_image": "",
"cwd": str(tmp_path),
"timeout": 30,
}
def _silent_bg_harness(monkeypatch, tmp_path):
"""Common test fixture: patch enough of terminal_tool to spawn a fake
background process and capture the JSON result the agent sees."""
import tools.terminal_tool as terminal_tool_module
from tools import process_registry as process_registry_module
from types import SimpleNamespace
config = _silent_bg_base_config(tmp_path)
dummy_env = SimpleNamespace(env={})
def fake_spawn_local(**kwargs):
return SimpleNamespace(
id="proc_silent_test",
pid=4242,
notify_on_complete=False,
watcher_platform="",
watcher_chat_id="",
watcher_user_id="",
watcher_user_name="",
watcher_thread_id="",
watcher_message_id="",
watcher_interval=0,
)
monkeypatch.setattr(terminal_tool_module, "_get_env_config", lambda: config)
monkeypatch.setattr(terminal_tool_module, "_start_cleanup_thread", lambda: None)
monkeypatch.setattr(terminal_tool_module, "_check_all_guards", lambda *_args, **_kwargs: {"approved": True})
monkeypatch.setattr(process_registry_module.process_registry, "spawn_local", fake_spawn_local)
monkeypatch.setitem(terminal_tool_module._active_environments, "default", dummy_env)
monkeypatch.setitem(terminal_tool_module._last_activity, "default", 0.0)
return terminal_tool_module
def test_background_without_notify_emits_silent_process_hint(monkeypatch, tmp_path):
"""The footgun case (May 2026 PR #31231): bg=True alone runs silently
and the agent has no signal it finished. Tool must nudge."""
tt = _silent_bg_harness(monkeypatch, tmp_path)
try:
result = json.loads(
tt.terminal_tool(
command="while true; do gh pr checks 999; sleep 30; done",
background=True,
)
)
finally:
tt._active_environments.pop("default", None)
tt._last_activity.pop("default", None)
assert result["session_id"] == "proc_silent_test"
hint = result.get("hint", "")
assert hint, "Silent background process must include a hint field"
assert "notify_on_complete" in hint, (
"Hint must name the corrective flag so the agent can self-correct"
)
assert "silent" in hint.lower() or "no way to learn" in hint.lower(), (
"Hint must explain the failure mode, not just suggest the fix"
)
def test_background_with_notify_does_not_emit_hint(monkeypatch, tmp_path):
"""The correct shape — bg+notify together — must not nag."""
tt = _silent_bg_harness(monkeypatch, tmp_path)
try:
result = json.loads(
tt.terminal_tool(
command="pytest tests/",
background=True,
notify_on_complete=True,
)
)
finally:
tt._active_environments.pop("default", None)
tt._last_activity.pop("default", None)
assert "hint" not in result, (
f"Correct usage must not emit a hint, got: {result.get('hint')!r}"
)
assert result.get("notify_on_complete") is True
def test_background_with_watch_patterns_does_not_emit_hint(monkeypatch, tmp_path):
"""watch_patterns is the other legitimate non-silent shape — also no hint."""
tt = _silent_bg_harness(monkeypatch, tmp_path)
try:
result = json.loads(
tt.terminal_tool(
command="uvicorn app:server --port 8080",
background=True,
watch_patterns=["Application startup complete"],
)
)
finally:
tt._active_environments.pop("default", None)
tt._last_activity.pop("default", None)
assert "hint" not in result, (
f"watch_patterns shape must not emit a silent-process hint, got: {result.get('hint')!r}"
)
def test_foreground_command_does_not_emit_hint(monkeypatch, tmp_path):
"""Hint only applies to background processes — foreground returns its
result synchronously and the agent always sees the outcome."""
tt = _silent_bg_harness(monkeypatch, tmp_path)
# Foreground path doesn't go through spawn_local. Patch the local-env
# exec method to short-circuit to a clean exit so the test doesn't
# actually shell out.
from types import SimpleNamespace
dummy_env = SimpleNamespace(
env={},
execute=lambda *a, **kw: {"output": "done", "exit_code": 0, "error": None},
)
monkeypatch.setitem(tt._active_environments, "default", dummy_env)
try:
result = json.loads(
tt.terminal_tool(
command="echo hello",
background=False,
)
)
finally:
tt._active_environments.pop("default", None)
tt._last_activity.pop("default", None)
assert "hint" not in result, (
f"Foreground commands must not emit the background-silence hint, got: {result.get('hint')!r}"
)

View file

@ -0,0 +1,287 @@
"""Regression tests for PR #6656 — skill uninstall + bundle hash + pairing lock.
Three independent fixes that were salvaged together:
1. ``uninstall_skill`` path traversal: ``install_path`` comes from a JSON
file on disk; a malicious skill could write ``install_path: "../../"``
and trigger ``shutil.rmtree`` against parent directories. Guarded with
``Path.resolve().is_relative_to(SKILLS_DIR.resolve())``.
2. ``bundle_content_hash`` / ``content_hash`` filename inclusion: the
previous hash mixed only file CONTENTS, so swapping ``SKILL.md`` and
``scripts/run.sh`` contents between two paths produced the same digest.
Now both functions prefix each entry with ``rel_path + \\x00`` and
stay symmetric (one on disk, one on in-memory bundle).
3. ``PairingStore.list_pending`` TOCTOU: previously called
``_cleanup_expired`` (which writes the JSON file) without holding
``self._lock``, racing with ``generate_code`` / ``approve_code``.
"""
from __future__ import annotations
import json
from pathlib import Path
from unittest.mock import patch
import pytest
from tools.skills_hub import (
SkillBundle,
bundle_content_hash,
uninstall_skill,
)
from tools.skills_guard import content_hash
# =============================================================================
# uninstall_skill: path traversal guard
# =============================================================================
class TestUninstallPathTraversal:
"""The ``install_path`` field in ``lock.json`` is attacker-controllable
if a malicious skill is ever installed (or if the hub's lockfile is
corrupted). The uninstall path must refuse anything that resolves
outside ``SKILLS_DIR``.
"""
@pytest.fixture
def hub_setup(self, tmp_path, monkeypatch):
"""Build a hub directory tree with a malicious lock.json entry.
``HubLockFile`` binds its default ``path`` argument at def time
against the module-level ``LOCK_FILE`` constant, so monkey-patching
``LOCK_FILE`` alone is not enough we also need to rebind the
function default. Patching ``HubLockFile.__init__.__defaults__``
is the standard tool for this.
"""
import tools.skills_hub as hub
skills_dir = tmp_path / "skills"
hub_dir = skills_dir / ".hub"
hub_dir.mkdir(parents=True)
lock_path = hub_dir / "lock.json"
monkeypatch.setattr(hub, "SKILLS_DIR", skills_dir)
monkeypatch.setattr(hub, "HUB_DIR", hub_dir)
monkeypatch.setattr(hub, "LOCK_FILE", lock_path)
monkeypatch.setattr(hub, "AUDIT_LOG", hub_dir / "audit.log")
# Rebind HubLockFile.__init__'s default `path=` arg so
# `HubLockFile()` (no args) picks up the new lock path.
monkeypatch.setattr(
hub.HubLockFile.__init__,
"__defaults__",
(lock_path,),
)
# A real directory outside skills_dir that the traversal would
# delete if the guard fails.
victim = tmp_path / "do-not-delete"
victim.mkdir()
(victim / "important.txt").write_text("data")
return skills_dir, hub_dir, victim
def _write_lock(self, hub_dir: Path, entries: dict) -> None:
lock_path = hub_dir / "lock.json"
lock_path.write_text(json.dumps({"version": 1, "installed": entries}))
def test_traversal_via_parent_segments_rejected(self, hub_setup):
"""install_path: "../do-not-delete" must NOT escape SKILLS_DIR."""
skills_dir, hub_dir, victim = hub_setup
self._write_lock(hub_dir, {
"evil": {
"install_path": "../do-not-delete",
"source": "https://example.com",
"version": "1.0",
},
})
ok, msg = uninstall_skill("evil")
assert ok is False
assert "outside" in msg or "resolves" in msg or "skills directory" in msg
# The victim directory MUST still exist.
assert victim.exists()
assert (victim / "important.txt").exists()
def test_absolute_path_rejected(self, hub_setup):
"""install_path that's an absolute path outside SKILLS_DIR must be refused."""
skills_dir, hub_dir, victim = hub_setup
self._write_lock(hub_dir, {
"evil": {
"install_path": str(victim),
"source": "https://example.com",
"version": "1.0",
},
})
ok, msg = uninstall_skill("evil")
# SKILLS_DIR / "<absolute>" still results in an absolute path,
# which when resolved is outside skills_dir. Must be refused.
assert ok is False
assert victim.exists()
def test_symlink_escape_rejected(self, tmp_path, hub_setup):
"""Symlinks inside SKILLS_DIR that point outside must be refused
after realpath resolution."""
skills_dir, hub_dir, victim = hub_setup
# Create a "skill" that's actually a symlink to victim
evil_link = skills_dir / "trapdoor"
evil_link.symlink_to(victim)
self._write_lock(hub_dir, {
"trap": {
"install_path": "trapdoor",
"source": "https://example.com",
"version": "1.0",
},
})
ok, msg = uninstall_skill("trap")
# realpath resolves the symlink → outside skills_dir → refused.
assert ok is False
assert victim.exists()
assert (victim / "important.txt").exists()
def test_legitimate_skill_uninstall_still_works(self, hub_setup):
"""The guard must NOT block a normal skill directory inside SKILLS_DIR."""
skills_dir, hub_dir, _victim = hub_setup
legit = skills_dir / "category" / "my-skill"
legit.mkdir(parents=True)
(legit / "SKILL.md").write_text("test")
self._write_lock(hub_dir, {
"my-skill": {
"install_path": "category/my-skill",
"source": "https://example.com",
"trust_level": "community",
"version": "1.0",
},
})
ok, msg = uninstall_skill("my-skill")
assert ok is True
assert not legit.exists()
# =============================================================================
# Bundle / disk hash symmetry + filename inclusion
# =============================================================================
class TestBundleHashFilenameSensitivity:
"""Hashes must change when filenames are swapped, even if combined
contents stay identical. ``bundle_content_hash`` (in-memory) and
``content_hash`` (on-disk) must stay symmetric they're used to
detect skill drift between an installed bundle and its source.
"""
def _make_bundle(self, files: dict) -> SkillBundle:
return SkillBundle(
name="test",
files=files,
source="test",
identifier="test/test",
trust_level="community",
)
def test_filename_swap_changes_hash(self):
"""Swapping content between SKILL.md and scripts/run.sh must
produce a different hash. Without the filename in the hash,
these two bundles would have looked identical."""
a = self._make_bundle({"SKILL.md": "hello", "scripts/run.sh": "world"})
b = self._make_bundle({"SKILL.md": "world", "scripts/run.sh": "hello"})
assert bundle_content_hash(a) != bundle_content_hash(b)
def test_identical_bundles_same_hash(self):
"""Sanity: equal content + paths = equal hash."""
a = self._make_bundle({"SKILL.md": "x", "run.sh": "y"})
b = self._make_bundle({"SKILL.md": "x", "run.sh": "y"})
assert bundle_content_hash(a) == bundle_content_hash(b)
def test_disk_hash_changes_on_filename_swap(self, tmp_path):
"""``content_hash`` on disk must also be filename-sensitive,
so it stays symmetric with ``bundle_content_hash``."""
skill_a = tmp_path / "a"
skill_a.mkdir()
(skill_a / "SKILL.md").write_text("hello")
(skill_a / "run.sh").write_text("world")
skill_b = tmp_path / "b"
skill_b.mkdir()
(skill_b / "SKILL.md").write_text("world")
(skill_b / "run.sh").write_text("hello")
# Different filename↔content mappings = different hashes.
assert content_hash(skill_a) != content_hash(skill_b)
def test_bundle_and_disk_hash_match(self, tmp_path):
"""Symmetry contract: the same skill, expressed as a SkillBundle
and as a directory tree, must produce the same digest. If this
fails, ``check_for_skill_updates`` will flag every clean
install as drifted."""
skill_dir = tmp_path / "skill"
skill_dir.mkdir()
(skill_dir / "SKILL.md").write_text("hello")
(skill_dir / "scripts").mkdir()
(skill_dir / "scripts" / "run.sh").write_text("world")
bundle = self._make_bundle({
"SKILL.md": "hello",
"scripts/run.sh": "world",
})
assert bundle_content_hash(bundle) == content_hash(skill_dir)
# =============================================================================
# PairingStore.list_pending: must hold the lock
# =============================================================================
class TestListPendingLock:
"""list_pending writes via _cleanup_expired. Without the lock,
a concurrent generate_code or approve_code can race against the
write, potentially clobbering a pending approval."""
def test_list_pending_acquires_lock(self, tmp_path):
"""Source-grep contract: ``list_pending`` body must be wrapped
in ``with self._lock:``. If anyone unwraps it again, the TOCTOU
bug returns."""
import gateway.pairing as _pairing_mod
source = Path(_pairing_mod.__file__).read_text(encoding="utf-8")
# Find the list_pending function body and assert the lock
# context manager appears inside it. We grep the function
# source rather than runtime-introspect because the racy
# behaviour is hard to deterministically reproduce in a test.
lines = source.splitlines()
in_func = False
seen_lock = False
for line in lines:
if line.startswith(" def list_pending("):
in_func = True
continue
if in_func:
if line.startswith(" def "):
break # next function
if "with self._lock:" in line:
seen_lock = True
break
assert seen_lock, (
"list_pending must wrap its body in `with self._lock:` — "
"without it, _cleanup_expired's file write races with "
"concurrent generate_code/approve_code."
)
def test_list_pending_returns_correct_data(self, tmp_path):
"""End-to-end smoke: even with the lock held, basic operation works."""
from gateway.pairing import PairingStore
with patch("gateway.pairing.PAIRING_DIR", tmp_path):
store = PairingStore()
store.generate_code("telegram", "user1", "Alice")
pending = store.list_pending("telegram")
assert len(pending) == 1
assert pending[0]["user_id"] == "user1"

View file

@ -1007,3 +1007,163 @@ def test_drain_notifications_empty_queue():
results = process_registry.drain_notifications()
assert results == []
# ---------------------------------------------------------------------------
# _terminate_host_pid — cross-platform process-tree termination
# ---------------------------------------------------------------------------
class TestTerminateHostPidWindows:
"""Windows branch uses ``taskkill /T /F`` — the documented MS tree-kill
primitive. We can't use psutil's ``children(recursive=True)`` /
``.terminate()`` path on Windows because (1) Windows doesn't maintain
a Unix-style process tree so the walk is unreliable, and (2)
``Process.terminate()`` on Windows is ``TerminateProcess()`` for the
target handle only, not the tree.
"""
def test_windows_invokes_taskkill_with_tree_and_force_flags(self, monkeypatch):
"""The Windows branch must shell out to ``taskkill /PID N /T /F``."""
from tools import process_registry as pr
captured = {}
def fake_run(args, **kwargs):
captured["args"] = args
captured["kwargs"] = kwargs
return MagicMock(returncode=0, stderr="", stdout="")
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
monkeypatch.setattr(pr.subprocess, "run", fake_run)
pr.ProcessRegistry._terminate_host_pid(12345)
assert captured["args"][0] == "taskkill"
assert "/PID" in captured["args"]
assert "12345" in captured["args"]
assert "/T" in captured["args"], "Tree flag required to reach descendants"
assert "/F" in captured["args"], "Force flag required for headless Chromium"
def test_windows_falls_back_to_os_kill_when_taskkill_missing(self, monkeypatch):
"""If ``taskkill.exe`` is somehow unavailable, fall back to a bare
``os.kill(pid, SIGTERM)`` so we at least try to kill the parent."""
from tools import process_registry as pr
kill_calls = []
def fake_run(*args, **kwargs):
raise FileNotFoundError("taskkill not found")
def fake_kill(pid, sig):
kill_calls.append((pid, sig))
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
monkeypatch.setattr(pr.subprocess, "run", fake_run)
monkeypatch.setattr(pr.os, "kill", fake_kill)
pr.ProcessRegistry._terminate_host_pid(12345)
assert kill_calls == [(12345, signal.SIGTERM)]
def test_windows_does_not_call_psutil(self, monkeypatch):
"""The Windows branch must NOT exercise the psutil tree-walk
(it's unreliable on Windows — see the function docstring)."""
from tools import process_registry as pr
import psutil
psutil_calls = []
class _BoomProcess:
def __init__(self, pid):
psutil_calls.append(("Process", pid))
def children(self, recursive=False):
psutil_calls.append(("children", recursive))
return []
def terminate(self):
psutil_calls.append(("terminate",))
def fake_run(args, **kwargs):
return MagicMock(returncode=0, stderr="", stdout="")
monkeypatch.setattr(pr, "_IS_WINDOWS", True)
monkeypatch.setattr(pr.subprocess, "run", fake_run)
monkeypatch.setattr(psutil, "Process", _BoomProcess)
pr.ProcessRegistry._terminate_host_pid(12345)
assert psutil_calls == [], (
f"Windows branch must not touch psutil, but saw {psutil_calls!r}"
)
class TestTerminateHostPidPosix:
"""POSIX branch walks the tree via psutil and SIGTERMs children first."""
def test_posix_walks_tree_and_terminates_children_then_parent(self, monkeypatch):
from tools import process_registry as pr
import psutil
terminate_order = []
class _FakeChild:
def __init__(self, pid):
self.pid = pid
def terminate(self):
terminate_order.append(self.pid)
class _FakeParent:
def __init__(self, pid):
self.pid = pid
def children(self, recursive=False):
assert recursive is True
return [_FakeChild(101), _FakeChild(102), _FakeChild(103)]
def terminate(self):
terminate_order.append(self.pid)
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
monkeypatch.setattr(psutil, "Process", _FakeParent)
pr.ProcessRegistry._terminate_host_pid(12345)
assert terminate_order == [101, 102, 103, 12345], (
"Children must be terminated before the parent"
)
def test_posix_no_such_process_swallowed(self, monkeypatch):
from tools import process_registry as pr
import psutil
def boom(pid):
raise psutil.NoSuchProcess(pid)
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
monkeypatch.setattr(psutil, "Process", boom)
# Must not raise.
pr.ProcessRegistry._terminate_host_pid(999999999)
def test_posix_oserror_falls_back_to_os_kill(self, monkeypatch):
from tools import process_registry as pr
import psutil
def boom(pid):
raise PermissionError("can't read /proc")
kill_calls = []
def fake_kill(pid, sig):
kill_calls.append((pid, sig))
monkeypatch.setattr(pr, "_IS_WINDOWS", False)
monkeypatch.setattr(psutil, "Process", boom)
monkeypatch.setattr(pr.os, "kill", fake_kill)
pr.ProcessRegistry._terminate_host_pid(12345)
assert kill_calls == [(12345, signal.SIGTERM)]

View file

@ -28,16 +28,93 @@ def _reset_signal_scheduler():
from gateway.config import Platform
from tools.send_message_tool import (
_derive_forum_thread_name,
_is_telegram_thread_not_found,
_parse_target_ref,
_send_discord,
_send_matrix_via_adapter,
_send_signal,
_send_telegram,
_send_to_platform,
send_message_tool,
)
# Discord helpers moved to the plugin in #24325. Import from the new path
# and provide a thin ``_send_discord(token, ...)`` shim that mirrors the
# pre-migration signature so the existing test bodies keep working.
from plugins.platforms.discord.adapter import (
_DISCORD_CHANNEL_TYPE_PROBE_CACHE,
_derive_forum_thread_name,
_probe_is_forum_cached,
_remember_channel_is_forum,
_standalone_send,
)
async def _send_discord(
token,
chat_id,
message,
*,
thread_id=None,
media_files=None,
):
"""Pre-migration ``(token, chat_id, message, …)`` adapter around the
plugin's ``_standalone_send(pconfig, …)``. Lets test bodies continue
to call ``_send_discord("tok", ...)`` without rewriting every signature.
"""
pconfig = SimpleNamespace(token=token, extra={})
return await _standalone_send(
pconfig,
chat_id,
message,
thread_id=thread_id,
media_files=media_files,
)
def _discord_entry():
"""Return the live Discord PlatformEntry, importing lazily so plugin
discovery is forced exactly once and patches survive across tests."""
from hermes_cli.plugins import discover_plugins
from gateway.platform_registry import platform_registry
discover_plugins()
return platform_registry.get("discord")
class _patch_discord_sender:
"""Patch the Discord registry entry's ``standalone_sender_fn`` with the
given mock and translate the production ``(pconfig, ...)`` call shape
back to the pre-migration ``(token, ...)`` shape the test mocks expect.
Use as a context manager:
send_mock = AsyncMock(return_value={...})
with _patch_discord_sender(send_mock):
asyncio.run(_send_to_platform(Platform.DISCORD, ...))
send_mock.assert_awaited_once_with("tok", "chat", "msg",
thread_id=None, media_files=[])
"""
def __init__(self, mock):
self._mock = mock
self._entry = None
self._original = None
async def _adapter(self, pconfig, chat_id, message, *, thread_id=None, media_files=None):
token = getattr(pconfig, "token", None)
return await self._mock(
token, chat_id, message,
thread_id=thread_id, media_files=media_files,
)
def __enter__(self):
self._entry = _discord_entry()
self._original = self._entry.standalone_sender_fn
self._entry.standalone_sender_fn = self._adapter
return self._mock
def __exit__(self, exc_type, exc, tb):
if self._entry is not None:
self._entry.standalone_sender_fn = self._original
return False
def _run_async_immediately(coro):
@ -300,6 +377,37 @@ class TestSendMessageTool:
user_id="user-123",
)
def test_media_tag_outside_allowed_roots_is_not_sent(self, tmp_path):
config, telegram_cfg = _make_config()
secret = tmp_path / "secret.pdf"
secret.write_bytes(b"%PDF secret")
with patch("gateway.config.load_gateway_config", return_value=config), \
patch("tools.interrupt.is_interrupted", return_value=False), \
patch("model_tools._run_async", side_effect=_run_async_immediately), \
patch("tools.send_message_tool._send_to_platform", new=AsyncMock(return_value={"success": True})) as send_mock, \
patch("gateway.mirror.mirror_to_session", return_value=True):
result = json.loads(
send_message_tool(
{
"action": "send",
"target": "telegram:12345",
"message": f"hello\nMEDIA:{secret}",
}
)
)
assert result["success"] is True
send_mock.assert_awaited_once_with(
Platform.TELEGRAM,
telegram_cfg,
"12345",
"hello",
thread_id=None,
media_files=[],
force_document=False,
)
def test_top_level_send_failure_redacts_query_token(self):
config, _telegram_cfg = _make_config()
leaked = "very-secret-query-token-123456"
@ -446,7 +554,7 @@ class TestSendToPlatformChunking:
"""Messages exceeding the platform limit are split into multiple sends."""
send = AsyncMock(return_value={"success": True, "message_id": "1"})
long_msg = "word " * 1000 # ~5000 chars, well over Discord's 2000 limit
with patch("tools.send_message_tool._send_discord", send):
with _patch_discord_sender(send):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
@ -1176,7 +1284,7 @@ class TestSendToPlatformDiscordThread:
"""Discord platform with thread_id passes it to _send_discord."""
send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})
with patch("tools.send_message_tool._send_discord", send_mock):
with _patch_discord_sender(send_mock):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
@ -1196,7 +1304,7 @@ class TestSendToPlatformDiscordThread:
"""Discord platform without thread_id passes None."""
send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})
with patch("tools.send_message_tool._send_discord", send_mock):
with _patch_discord_sender(send_mock):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
@ -1360,7 +1468,7 @@ class TestSendToPlatformDiscordMedia:
# A message long enough to get chunked (Discord limit is 2000)
long_msg = "A" * 1900 + " " + "B" * 1900
with patch("tools.send_message_tool._send_discord", side_effect=mock_send_discord):
with _patch_discord_sender(AsyncMock(side_effect=mock_send_discord)):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
@ -1380,7 +1488,7 @@ class TestSendToPlatformDiscordMedia:
"""Short message (single chunk) gets media_files directly."""
send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})
with patch("tools.send_message_tool._send_discord", send_mock):
with _patch_discord_sender(send_mock):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
@ -1618,7 +1726,7 @@ class TestSendToPlatformDiscordForum:
"""Discord messages are routed through _send_discord, which handles forum detection."""
send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})
with patch("tools.send_message_tool._send_discord", send_mock):
with _patch_discord_sender(send_mock):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
@ -1637,7 +1745,7 @@ class TestSendToPlatformDiscordForum:
"""Thread ID is still passed through when sending to Discord."""
send_mock = AsyncMock(return_value={"success": True, "message_id": "1"})
with patch("tools.send_message_tool._send_discord", send_mock):
with _patch_discord_sender(send_mock):
result = asyncio.run(
_send_to_platform(
Platform.DISCORD,
@ -1775,11 +1883,11 @@ class TestForumProbeCache:
"""_DISCORD_CHANNEL_TYPE_PROBE_CACHE memoizes forum detection results."""
def setup_method(self):
from tools import send_message_tool as smt
smt._DISCORD_CHANNEL_TYPE_PROBE_CACHE.clear()
from plugins.platforms.discord import adapter as discord_adapter
discord_adapter._DISCORD_CHANNEL_TYPE_PROBE_CACHE.clear()
def test_cache_round_trip(self):
from tools.send_message_tool import (
from plugins.platforms.discord.adapter import (
_probe_is_forum_cached,
_remember_channel_is_forum,
)
@ -1819,7 +1927,7 @@ class TestForumProbeCache:
thread_session.post = MagicMock(return_value=thread_resp)
# Two _send_discord calls: first does probe + thread-create; second should skip probe
from tools import send_message_tool as smt
from plugins.platforms.discord import adapter as discord_adapter
sessions_created = []
@ -1837,7 +1945,7 @@ class TestForumProbeCache:
with patch("aiohttp.ClientSession", side_effect=session_factory):
result1 = asyncio.run(_send_discord("tok", "ch1", "first"))
assert result1["success"] is True
assert smt._probe_is_forum_cached("ch1") is True
assert discord_adapter._probe_is_forum_cached("ch1") is True
# Second call: cache hits, no new probe session needed. We need to only
# return thread_session now since probe is skipped.
@ -2575,4 +2683,3 @@ class TestSendTelegramThreadNotFoundRetry:
finally:
if media_path and os.path.exists(media_path):
os.unlink(media_path)

View file

@ -0,0 +1,103 @@
"""Tests for tools.skills_ast_audit — opt-in AST diagnostic scanner."""
import sys
from pathlib import Path
from tools.skills_ast_audit import ast_scan_path, format_ast_report
def _pids(findings):
return [pid for (_f, _l, pid, _d) in findings]
def test_bypass_payload_detected(tmp_path):
"""The exact bypass shape from #7072 is caught."""
f = tmp_path / "exfil.py"
f.write_text(
"import importlib\n"
"parts = ['o', 's']\n"
"m = importlib.import_module(''.join(parts))\n"
"e = m.__dict__[''.join(['e','n','v'])]\n"
)
pids = _pids(ast_scan_path(f))
assert "dynamic_import" in pids
assert "importlib_import" in pids
assert "dict_access" in pids
def test_syntax_error_does_not_crash(tmp_path):
f = tmp_path / "bad.py"
f.write_text("def broken(\n")
assert ast_scan_path(f) == []
def test_recursion_error_does_not_crash(tmp_path):
f = tmp_path / "deep.py"
f.write_text("a" + ".x" * 5000 + "\n")
orig = sys.getrecursionlimit()
sys.setrecursionlimit(200)
try:
result = ast_scan_path(f)
finally:
sys.setrecursionlimit(orig)
assert isinstance(result, list)
def test_importer_lookalike_not_flagged(tmp_path):
"""`import importer` must NOT match — dot-bounded prefix."""
f = tmp_path / "ok.py"
f.write_text("import importer\nfrom importer import x\n")
assert _pids(ast_scan_path(f)) == []
def test_literal_dunder_import_not_flagged(tmp_path):
"""__import__('os') with a literal is not flagged (regex catches those)."""
f = tmp_path / "ok.py"
f.write_text("m = __import__('os')\n")
assert "dynamic_import_computed" not in _pids(ast_scan_path(f))
def test_non_python_file_returns_empty(tmp_path):
f = tmp_path / "script.sh"
f.write_text("import importlib\n")
assert ast_scan_path(f) == []
def test_directory_scans_recursively_and_skips_cache_dirs(tmp_path):
skill = tmp_path / "s"
skill.mkdir()
(skill / "main.py").write_text("import importlib\n")
(skill / "sub").mkdir()
(skill / "sub" / "u.py").write_text("from importlib.util import find_spec\n")
for d in ("__pycache__", ".venv", "venv", "node_modules"):
ignored = skill / d
ignored.mkdir()
(ignored / "junk.py").write_text("import importlib\n")
pids = _pids(ast_scan_path(skill))
assert pids.count("importlib_import") == 2
def test_missing_path_returns_empty(tmp_path):
assert ast_scan_path(tmp_path / "does_not_exist") == []
def test_dynamic_getattr_and_dict_access_detected(tmp_path):
f = tmp_path / "g.py"
f.write_text("name = 'x'\nv = getattr(o, name)\nv = o.__dict__[name]\n")
pids = _pids(ast_scan_path(f))
assert "dynamic_getattr" in pids
assert "dict_access" in pids
def test_format_report_empty():
assert "No dynamic" in format_ast_report([])
def test_format_report_with_findings():
findings = [
("a.py", 1, "importlib_import", "import importlib — ..."),
("a.py", 3, "dynamic_import", "importlib.import_module() — ..."),
]
out = format_ast_report(findings, skill_name="test")
assert "test" in out and "a.py" in out and "L1" in out and "L3" in out
assert "diagnostic hints" in out

View file

@ -84,13 +84,13 @@ class TestDetermineVerdict:
f = Finding("x", "high", "network", "f.py", 1, "m", "d")
assert _determine_verdict([f]) == "caution"
def test_medium_finding_caution(self):
def test_medium_finding_safe(self):
f = Finding("x", "medium", "structural", "f.py", 1, "m", "d")
assert _determine_verdict([f]) == "caution"
assert _determine_verdict([f]) == "safe"
def test_low_finding_caution(self):
def test_low_finding_safe(self):
f = Finding("x", "low", "obfuscation", "f.py", 1, "m", "d")
assert _determine_verdict([f]) == "caution"
assert _determine_verdict([f]) == "safe"
# ---------------------------------------------------------------------------
@ -145,21 +145,46 @@ class TestShouldAllowInstall:
allowed, _ = should_allow_install(self._result("community", "dangerous", f), force=False)
assert allowed is False
def test_force_overrides_dangerous_for_community(self):
def test_force_does_not_override_dangerous_for_community(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, reason = should_allow_install(
self._result("community", "dangerous", f), force=True
)
assert allowed is True
assert "Force-installed" in reason
assert allowed is False
assert "Blocked" in reason
# Error message MUST explain why --force didn't work, not invite a retry.
assert "does not override" in reason
assert "Use --force to override" not in reason
def test_force_overrides_dangerous_for_trusted(self):
def test_force_does_not_override_dangerous_for_trusted_message(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, reason = should_allow_install(
self._result("trusted", "dangerous", f), force=True
)
assert allowed is True
assert "Force-installed" in reason
assert allowed is False
assert "does not override" in reason
assert "Use --force to override" not in reason
def test_non_dangerous_block_keeps_force_hint(self):
# When --force CAN override the block, the error message must still
# point to it. Use builtin trust + dangerous to land in the block
# branch without triggering the dangerous-specific message.
f = [Finding("x", "high", "network", "f", 1, "m", "d")]
# Construct a path where decision == block but verdict != dangerous.
# community + caution = block per current INSTALL_POLICY.
allowed, reason = should_allow_install(
self._result("community", "caution", f), force=False
)
assert allowed is False
assert "Use --force to override" in reason
def test_force_does_not_override_dangerous_for_trusted(self):
f = [Finding("x", "critical", "c", "f", 1, "m", "d")]
allowed, reason = should_allow_install(
self._result("trusted", "dangerous", f), force=True
)
assert allowed is False
assert "Blocked" in reason
# -- agent-created policy --

View file

@ -91,7 +91,7 @@ class TestSSHBulkUpload:
assert "/home/testuser/.hermes/credentials" in mkdir_str
def test_staging_symlinks_mirror_remote_layout(self, mock_env, tmp_path):
"""Symlinks in staging dir should mirror the remote path structure."""
"""Symlinks in staging dir should mirror the .hermes-relative layout."""
f1 = tmp_path / "local_a.txt"
f1.write_text("content a")
@ -107,9 +107,7 @@ class TestSSHBulkUpload:
c_idx = cmd.index("-C")
staging_dir = cmd[c_idx + 1]
# Check the symlink exists
expected = os.path.join(
staging_dir, "home/testuser/.hermes/skills/my_skill.md"
)
expected = os.path.join(staging_dir, "skills/my_skill.md")
staging_paths.append(expected)
assert os.path.islink(expected), f"Expected symlink at {expected}"
assert os.readlink(expected) == os.path.abspath(str(f1))
@ -166,14 +164,42 @@ class TestSSHBulkUpload:
assert "-" in tar_cmd # stdout
assert "-C" in tar_cmd
# ssh: extract from stdin at /, preserving existing dir modes (#17767)
# ssh: extract from stdin at ~/.hermes, preserving existing dir modes (#17767)
ssh_str = " ".join(ssh_cmd)
assert "ssh" in ssh_str
assert "tar xf -" in ssh_str
assert "--no-overwrite-dir" in ssh_str
assert "-C /" in ssh_str
assert "-C /home/testuser/.hermes" in ssh_str
assert "testuser@example.com" in ssh_str
def test_bulk_upload_never_stages_remote_home_prefix(self, mock_env, tmp_path):
"""Regression: do not archive /home/<user> path components."""
f1 = tmp_path / "nested.txt"
f1.write_text("nested")
files = [(str(f1), "/home/testuser/.hermes/cache/nested.txt")]
def capture_tar_cmd(cmd, **kwargs):
if cmd[0] == "tar":
c_idx = cmd.index("-C")
staging_dir = cmd[c_idx + 1]
assert not os.path.exists(os.path.join(staging_dir, "home"))
expected = os.path.join(staging_dir, "cache/nested.txt")
assert os.path.islink(expected)
mock = MagicMock()
mock.stdout = MagicMock()
mock.returncode = 0
mock.poll.return_value = 0
mock.communicate.return_value = (b"", b"")
mock.stderr = MagicMock()
mock.stderr.read.return_value = b""
return mock
with patch.object(subprocess, "run",
return_value=subprocess.CompletedProcess([], 0)), \
patch.object(subprocess, "Popen", side_effect=capture_tar_cmd):
mock_env._ssh_bulk_upload(files)
def test_mkdir_failure_raises(self, mock_env, tmp_path):
"""mkdir failure should raise RuntimeError before tar pipe."""
f1 = tmp_path / "y.txt"

View file

@ -831,7 +831,8 @@ class TestDiskFailureMarker:
with patch("tools.tirith_security._failure_marker_path", return_value=marker):
from tools.tirith_security import _mark_install_failed, _is_install_failed_on_disk
_mark_install_failed("cosign_missing")
assert _is_install_failed_on_disk() # cosign still absent
with patch("tools.tirith_security.shutil.which", return_value=None):
assert _is_install_failed_on_disk() # cosign still absent
# Now cosign appears on PATH
with patch("tools.tirith_security.shutil.which", return_value="/usr/local/bin/cosign"):

View file

@ -23,6 +23,9 @@ def _fake_faster_whisper_module(mock_model):
# ---------------------------------------------------------------------------
pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
@pytest.fixture(autouse=True)
def _clear_openai_env(monkeypatch):
monkeypatch.delenv("OPENAI_API_KEY", raising=False)

View file

@ -12,6 +12,9 @@ from unittest.mock import MagicMock, patch
import pytest
pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
@pytest.fixture(autouse=True)
def isolate_env(monkeypatch):
"""Strip every STT-related env var so the test really exercises the

View file

@ -42,6 +42,9 @@ def sample_ogg(tmp_path):
return str(ogg_path)
pytestmark = pytest.mark.usefixtures("disable_lazy_stt_install")
@pytest.fixture(autouse=True)
def clean_env(monkeypatch):
"""Ensure no real API keys leak into tests."""

View file

@ -10,6 +10,18 @@ from unittest.mock import MagicMock, patch
import pytest
def _non_wsl_proc_version(real_open):
"""Return an open() shim that makes host WSL detection deterministic."""
def _fake_open(file, *args, **kwargs):
if file == "/proc/version":
from io import StringIO
return StringIO("Linux test-kernel")
return real_open(file, *args, **kwargs)
return _fake_open
# ============================================================================
# Fixtures
# ============================================================================
@ -68,6 +80,7 @@ class TestDetectAudioEnvironment:
monkeypatch.delenv("SSH_CONNECTION", raising=False)
monkeypatch.setattr("tools.voice_mode._import_audio",
lambda: (MagicMock(), MagicMock()))
monkeypatch.setattr("builtins.open", _non_wsl_proc_version(open))
from tools.voice_mode import detect_audio_environment
result = detect_audio_environment()
@ -225,6 +238,7 @@ class TestDetectAudioEnvironment:
monkeypatch.setattr("tools.voice_mode.shutil.which", lambda cmd: "/data/data/com.termux/files/usr/bin/termux-microphone-record" if cmd == "termux-microphone-record" else None)
monkeypatch.setattr("tools.voice_mode._termux_api_app_installed", lambda: True)
monkeypatch.setattr("tools.voice_mode._import_audio", lambda: (_ for _ in ()).throw(ImportError("no audio libs")))
monkeypatch.setattr("builtins.open", _non_wsl_proc_version(open))
from tools.voice_mode import detect_audio_environment
result = detect_audio_environment()