fix(tests): resolve 17 persistent CI test failures (#15084)

Make the main-branch test suite pass again. Most failures were tests
still asserting old shapes after recent refactors; two were real source
bugs.

Source fixes:
- tools/mcp_tool.py: _kill_orphaned_mcp_children() slept 2s on every
  shutdown even when no tracked PIDs existed, making test_shutdown_is_parallel
  measure ~3s for 3 parallel 1s shutdowns. Early-return when pids is empty.
- hermes_cli/tips.py: tip 105 was 157 chars; corpus max is 150.

Test fixes (mostly stale mock targets / missing fixture fields):
- test_zombie_process_cleanup, test_agent_cache: patch run_agent.cleanup_vm
  (the local name bound at import), not tools.terminal_tool.cleanup_vm.
- test_browser_camofox: patch tools.browser_camofox.load_config, not
  hermes_cli.config.load_config (the source module, not the resolved one).
- test_flush_memories_codex._chat_response_with_memory_call: add
  finish_reason, tool_call.id, tool_call.type so the chat_completions
  transport normalizer doesn't AttributeError.
- test_concurrent_interrupt: polling_tool signature now accepts
  messages= kwarg that _invoke_tool() passes through.
- test_minimax_provider: add _fallback_chain=[] to the __new__'d agent
  so switch_model() doesn't AttributeError.
- test_skills_config: SKILLS_DIR MagicMock + .rglob stopped working
  after the scanner switched to agent.skill_utils.iter_skill_index_files
  (os.walk-based). Point SKILLS_DIR at a real tmp_path and patch
  agent.skill_utils.get_external_skills_dirs.
- test_browser_cdp_tool: browser_cdp toolset was intentionally split into
  'browser-cdp' (commit 96b0f3700) so its stricter check_fn doesn't gate
  the whole browser toolset; test now expects 'browser-cdp'.
- test_registry: add tools.browser_dialog_tool to the expected
  builtin-discovery set (PR #14540 added it).
- test_file_tools TestPatchHints: patch_tool surfaces hints as a '_hint'
  key on the JSON payload, not inline '[Hint: ...' text.
- test_write_deny test_hermes_env: resolve .env via get_hermes_home() so
  the path matches the profile-aware denylist under hermetic HERMES_HOME.
- test_checkpoint_manager test_falls_back_to_parent: guard the walk-up
  so a stray /tmp/pyproject.toml on the host doesn't pick up /tmp as the
  project root.
- test_quick_commands: set cli.session_id in the __new__'d CLI so the
  alias-args path doesn't trip AttributeError when fuzzy-matching leaks
  a skill command across xdist test distribution.
This commit is contained in:
Teknium 2026-04-24 03:46:46 -07:00 committed by GitHub
parent 1f9c368622
commit 18f3fc8a6f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 83 additions and 29 deletions

View file

@ -127,7 +127,7 @@ TIPS = [
# --- Tools & Capabilities --- # --- Tools & Capabilities ---
"execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.", "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.",
"delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.", "delegate_task spawns up to 3 concurrent sub-agents by default (delegation.max_concurrent_children) with isolated contexts for parallel work.",
"web_extract works on PDF URLs — pass any PDF link and it converts to markdown.", "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.",
"search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.", "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.",
"patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.", "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.",

View file

@ -341,6 +341,7 @@ class TestMinimaxSwitchModelCredentialGuard:
agent._client_kwargs = {} agent._client_kwargs = {}
agent.client = None agent.client = None
agent._anthropic_client = MagicMock() agent._anthropic_client = MagicMock()
agent._fallback_chain = []
with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \
patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-leaked") as mock_resolve, \ patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-leaked") as mock_resolve, \

View file

@ -23,6 +23,11 @@ class TestCLIQuickCommands:
cli.console = MagicMock() cli.console = MagicMock()
cli.agent = None cli.agent = None
cli.conversation_history = [] cli.conversation_history = []
# session_id is accessed by the fallback skill/fuzzy-match path in
# process_command; without it, tests that exercise `/alias args`
# can trip an AttributeError when cross-test state leaks a skill
# command matching the alias target.
cli.session_id = "test-session"
return cli return cli
def test_exec_command_runs_and_prints_output(self): def test_exec_command_runs_and_prints_output(self):

View file

@ -950,7 +950,7 @@ class TestAgentCacheIdleResume:
release_clients() (soft session may resume). release_clients() (soft session may resume).
""" """
from run_agent import AIAgent from run_agent import AIAgent
from tools import terminal_tool as _tt import run_agent as _ra
# Agent A: evicted from cache (soft) — terminal survives. # Agent A: evicted from cache (soft) — terminal survives.
# Agent B: session expired (hard) — terminal torn down. # Agent B: session expired (hard) — terminal torn down.
@ -970,13 +970,16 @@ class TestAgentCacheIdleResume:
) )
vm_calls: list = [] vm_calls: list = []
original_vm = _tt.cleanup_vm # AIAgent.close() calls the ``cleanup_vm`` name bound into
_tt.cleanup_vm = lambda tid: vm_calls.append(tid) # ``run_agent`` at import time, not ``tools.terminal_tool.cleanup_vm``
# directly — so patch the ``run_agent`` reference.
original_vm = _ra.cleanup_vm
_ra.cleanup_vm = lambda tid: vm_calls.append(tid)
try: try:
agent_a.release_clients() # cache eviction agent_a.release_clients() # cache eviction
agent_b.close() # session expiry agent_b.close() # session expiry
finally: finally:
_tt.cleanup_vm = original_vm _ra.cleanup_vm = original_vm
try: try:
agent_a.close() agent_a.close()
except Exception: except Exception:

View file

@ -251,43 +251,48 @@ class TestGetDisabledSkillNames:
class TestFindAllSkillsFiltering: class TestFindAllSkillsFiltering:
@patch("tools.skills_tool._get_disabled_skill_names", return_value={"my-skill"}) @patch("tools.skills_tool._get_disabled_skill_names", return_value={"my-skill"})
@patch("tools.skills_tool.skill_matches_platform", return_value=True) @patch("tools.skills_tool.skill_matches_platform", return_value=True)
@patch("tools.skills_tool.SKILLS_DIR") def test_disabled_skill_excluded(self, mock_platform, mock_disabled, tmp_path, monkeypatch):
def test_disabled_skill_excluded(self, mock_dir, mock_platform, mock_disabled, tmp_path):
skill_dir = tmp_path / "my-skill" skill_dir = tmp_path / "my-skill"
skill_dir.mkdir() skill_dir.mkdir()
skill_md = skill_dir / "SKILL.md" skill_md = skill_dir / "SKILL.md"
skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent") skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
mock_dir.exists.return_value = True # Point SKILLS_DIR at the real tempdir so iter_skill_index_files
mock_dir.rglob.return_value = [skill_md] # (which uses os.walk) can actually find the file.
import tools.skills_tool as _st
import agent.skill_utils as _su
monkeypatch.setattr(_st, "SKILLS_DIR", tmp_path)
monkeypatch.setattr(_su, "get_external_skills_dirs", lambda: [])
from tools.skills_tool import _find_all_skills from tools.skills_tool import _find_all_skills
skills = _find_all_skills() skills = _find_all_skills()
assert not any(s["name"] == "my-skill" for s in skills) assert not any(s["name"] == "my-skill" for s in skills)
@patch("tools.skills_tool._get_disabled_skill_names", return_value=set()) @patch("tools.skills_tool._get_disabled_skill_names", return_value=set())
@patch("tools.skills_tool.skill_matches_platform", return_value=True) @patch("tools.skills_tool.skill_matches_platform", return_value=True)
@patch("tools.skills_tool.SKILLS_DIR") def test_enabled_skill_included(self, mock_platform, mock_disabled, tmp_path, monkeypatch):
def test_enabled_skill_included(self, mock_dir, mock_platform, mock_disabled, tmp_path):
skill_dir = tmp_path / "my-skill" skill_dir = tmp_path / "my-skill"
skill_dir.mkdir() skill_dir.mkdir()
skill_md = skill_dir / "SKILL.md" skill_md = skill_dir / "SKILL.md"
skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent") skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
mock_dir.exists.return_value = True import tools.skills_tool as _st
mock_dir.rglob.return_value = [skill_md] import agent.skill_utils as _su
monkeypatch.setattr(_st, "SKILLS_DIR", tmp_path)
monkeypatch.setattr(_su, "get_external_skills_dirs", lambda: [])
from tools.skills_tool import _find_all_skills from tools.skills_tool import _find_all_skills
skills = _find_all_skills() skills = _find_all_skills()
assert any(s["name"] == "my-skill" for s in skills) assert any(s["name"] == "my-skill" for s in skills)
@patch("tools.skills_tool._get_disabled_skill_names", return_value={"my-skill"}) @patch("tools.skills_tool._get_disabled_skill_names", return_value={"my-skill"})
@patch("tools.skills_tool.skill_matches_platform", return_value=True) @patch("tools.skills_tool.skill_matches_platform", return_value=True)
@patch("tools.skills_tool.SKILLS_DIR") def test_skip_disabled_returns_all(self, mock_platform, mock_disabled, tmp_path, monkeypatch):
def test_skip_disabled_returns_all(self, mock_dir, mock_platform, mock_disabled, tmp_path):
"""skip_disabled=True ignores the disabled set (for config UI).""" """skip_disabled=True ignores the disabled set (for config UI)."""
skill_dir = tmp_path / "my-skill" skill_dir = tmp_path / "my-skill"
skill_dir.mkdir() skill_dir.mkdir()
skill_md = skill_dir / "SKILL.md" skill_md = skill_dir / "SKILL.md"
skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent") skill_md.write_text("---\nname: my-skill\ndescription: A test skill\n---\nContent")
mock_dir.exists.return_value = True import tools.skills_tool as _st
mock_dir.rglob.return_value = [skill_md] import agent.skill_utils as _su
monkeypatch.setattr(_st, "SKILLS_DIR", tmp_path)
monkeypatch.setattr(_su, "get_external_skills_dirs", lambda: [])
from tools.skills_tool import _find_all_skills from tools.skills_tool import _find_all_skills
skills = _find_all_skills(skip_disabled=True) skills = _find_all_skills(skip_disabled=True)
assert any(s["name"] == "my-skill" for s in skills) assert any(s["name"] == "my-skill" for s in skills)

View file

@ -184,7 +184,7 @@ def test_running_concurrent_worker_sees_is_interrupted(monkeypatch):
observed = {"saw_true": False, "poll_count": 0, "worker_tid": None} observed = {"saw_true": False, "poll_count": 0, "worker_tid": None}
worker_started = threading.Event() worker_started = threading.Event()
def polling_tool(name, args, task_id, call_id=None): def polling_tool(name, args, task_id, call_id=None, messages=None):
observed["worker_tid"] = threading.current_thread().ident observed["worker_tid"] = threading.current_thread().ident
worker_started.set() worker_started.set()
deadline = time.monotonic() + 5.0 deadline = time.monotonic() + 5.0

View file

@ -73,9 +73,12 @@ def _chat_response_with_memory_call():
"""Simulated chat completions response with a memory tool call.""" """Simulated chat completions response with a memory tool call."""
return SimpleNamespace( return SimpleNamespace(
choices=[SimpleNamespace( choices=[SimpleNamespace(
finish_reason="tool_calls",
message=SimpleNamespace( message=SimpleNamespace(
content=None, content=None,
tool_calls=[SimpleNamespace( tool_calls=[SimpleNamespace(
id="call_mem_0",
type="function",
function=SimpleNamespace( function=SimpleNamespace(
name="memory", name="memory",
arguments=json.dumps({ arguments=json.dumps({

View file

@ -283,7 +283,7 @@ class TestCamofoxVisionConfig:
with ( with (
patch("tools.browser_camofox.open", create=True) as mock_open, patch("tools.browser_camofox.open", create=True) as mock_open,
patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_llm, patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_llm,
patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {"temperature": 1, "timeout": 45}}}), patch("tools.browser_camofox.load_config", return_value={"auxiliary": {"vision": {"temperature": 1, "timeout": 45}}}),
): ):
mock_open.return_value.__enter__.return_value.read.return_value = b"fakepng" mock_open.return_value.__enter__.return_value.read.return_value = b"fakepng"
result = json.loads(camofox_vision("what is on the page?", annotate=True, task_id="t11")) result = json.loads(camofox_vision("what is on the page?", annotate=True, task_id="t11"))
@ -315,7 +315,7 @@ class TestCamofoxVisionConfig:
with ( with (
patch("tools.browser_camofox.open", create=True) as mock_open, patch("tools.browser_camofox.open", create=True) as mock_open,
patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_llm, patch("agent.auxiliary_client.call_llm", return_value=mock_response) as mock_llm,
patch("hermes_cli.config.load_config", return_value={"auxiliary": {"vision": {}}}), patch("tools.browser_camofox.load_config", return_value={"auxiliary": {"vision": {}}}),
): ):
mock_open.return_value.__enter__.return_value.read.return_value = b"fakepng" mock_open.return_value.__enter__.return_value.read.return_value = b"fakepng"
result = json.loads(camofox_vision("what is on the page?", annotate=True, task_id="t12")) result = json.loads(camofox_vision("what is on the page?", annotate=True, task_id="t12"))

View file

@ -351,7 +351,10 @@ def test_registered_in_browser_toolset():
entry = registry.get_entry("browser_cdp") entry = registry.get_entry("browser_cdp")
assert entry is not None assert entry is not None
assert entry.toolset == "browser" # browser_cdp lives in its own toolset so its stricter check_fn
# (requires reachable CDP endpoint) doesn't gate the whole browser
# toolset — see commit 96b0f3700.
assert entry.toolset == "browser-cdp"
assert entry.schema["name"] == "browser_cdp" assert entry.schema["name"] == "browser_cdp"
assert entry.schema["parameters"]["required"] == ["method"] assert entry.schema["parameters"]["required"] == ["method"]
assert "Chrome DevTools Protocol" in entry.schema["description"] assert "Chrome DevTools Protocol" in entry.schema["description"]

View file

@ -357,12 +357,33 @@ class TestWorkingDirResolution:
result = mgr.get_working_dir_for_path(str(subdir / "file.py")) result = mgr.get_working_dir_for_path(str(subdir / "file.py"))
assert result == str(project) assert result == str(project)
def test_falls_back_to_parent(self, tmp_path): def test_falls_back_to_parent(self, tmp_path, monkeypatch):
mgr = CheckpointManager(enabled=True) mgr = CheckpointManager(enabled=True)
filepath = tmp_path / "random" / "file.py" filepath = tmp_path / "random" / "file.py"
filepath.parent.mkdir(parents=True) filepath.parent.mkdir(parents=True)
filepath.write_text("x\\n") filepath.write_text("x\\n")
# The walk-up scan for project markers (.git, pyproject.toml, etc.)
# stops at tmp_path — otherwise stray markers in ``/tmp`` (e.g.
# ``/tmp/pyproject.toml`` left by other tools on the host) get
# picked up as the project root and this test flakes on shared CI.
import pathlib as _pl
_real_exists = _pl.Path.exists
def _guarded_exists(self):
s = str(self)
stop = str(tmp_path)
if not s.startswith(stop) and any(
s.endswith("/" + m) or s == "/" + m
for m in (".git", "pyproject.toml", "package.json",
"Cargo.toml", "go.mod", "Makefile", "pom.xml",
".hg", "Gemfile")
):
return False
return _real_exists(self)
monkeypatch.setattr(_pl.Path, "exists", _guarded_exists)
result = mgr.get_working_dir_for_path(str(filepath)) result = mgr.get_working_dir_for_path(str(filepath))
assert result == str(filepath.parent) assert result == str(filepath.parent)

View file

@ -247,7 +247,9 @@ class TestPatchHints:
from tools.file_tools import patch_tool from tools.file_tools import patch_tool
raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y") raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y")
assert "[Hint:" in raw # patch_tool surfaces the hint as a structured "_hint" field on the
# JSON error payload (not an inline "[Hint: ..." tail).
assert "_hint" in raw
assert "read_file" in raw assert "read_file" in raw
@patch("tools.file_tools._get_file_ops") @patch("tools.file_tools._get_file_ops")
@ -260,7 +262,7 @@ class TestPatchHints:
from tools.file_tools import patch_tool from tools.file_tools import patch_tool
raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y") raw = patch_tool(mode="replace", path="foo.py", old_string="x", new_string="y")
assert "[Hint:" not in raw assert "_hint" not in raw
class TestSearchHints: class TestSearchHints:

View file

@ -292,6 +292,7 @@ class TestBuiltinDiscovery:
def test_matches_previous_manual_builtin_tool_set(self): def test_matches_previous_manual_builtin_tool_set(self):
expected = { expected = {
"tools.browser_cdp_tool", "tools.browser_cdp_tool",
"tools.browser_dialog_tool",
"tools.browser_tool", "tools.browser_tool",
"tools.clarify_tool", "tools.clarify_tool",
"tools.code_execution_tool", "tools.code_execution_tool",

View file

@ -33,7 +33,12 @@ class TestWriteDenyExactPaths:
assert _is_write_denied(path) is True assert _is_write_denied(path) is True
def test_hermes_env(self): def test_hermes_env(self):
path = os.path.join(str(Path.home()), ".hermes", ".env") # ``.env`` under the active HERMES_HOME (profile-aware, not just
# ``~/.hermes``) must be write-denied. The hermetic test conftest
# points HERMES_HOME at a tempdir — resolve via get_hermes_home()
# to match the denylist.
from hermes_constants import get_hermes_home
path = str(get_hermes_home() / ".env")
assert _is_write_denied(path) is True assert _is_write_denied(path) is True
def test_shell_profiles(self): def test_shell_profiles(self):

View file

@ -110,8 +110,8 @@ class TestAgentCloseMethod:
agent.client = None agent.client = None
with patch("tools.process_registry.process_registry") as mock_registry, \ with patch("tools.process_registry.process_registry") as mock_registry, \
patch("tools.terminal_tool.cleanup_vm") as mock_cleanup_vm, \ patch("run_agent.cleanup_vm") as mock_cleanup_vm, \
patch("tools.browser_tool.cleanup_browser") as mock_cleanup_browser: patch("run_agent.cleanup_browser") as mock_cleanup_browser:
agent.close() agent.close()
mock_registry.kill_all.assert_called_once_with( mock_registry.kill_all.assert_called_once_with(
@ -172,9 +172,9 @@ class TestAgentCloseMethod:
with patch( with patch(
"tools.process_registry.process_registry" "tools.process_registry.process_registry"
) as mock_reg, patch( ) as mock_reg, patch(
"tools.terminal_tool.cleanup_vm" "run_agent.cleanup_vm"
) as mock_vm, patch( ) as mock_vm, patch(
"tools.browser_tool.cleanup_browser" "run_agent.cleanup_browser"
) as mock_browser: ) as mock_browser:
mock_reg.kill_all.side_effect = RuntimeError("boom") mock_reg.kill_all.side_effect = RuntimeError("boom")

View file

@ -2809,6 +2809,11 @@ def _kill_orphaned_mcp_children() -> None:
pids = dict(_stdio_pids) pids = dict(_stdio_pids)
_stdio_pids.clear() _stdio_pids.clear()
# Fast path: no tracked stdio PIDs to reap. Skip the SIGTERM/sleep/SIGKILL
# dance entirely — otherwise every MCP-free shutdown pays a 2s sleep tax.
if not pids:
return
# Phase 1: SIGTERM (graceful) # Phase 1: SIGTERM (graceful)
for pid, server_name in pids.items(): for pid, server_name in pids.items():
try: try: