mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
A flaky external probe in a tool's check_fn (e.g. check_terminal_requirements running `docker version` with a 5s timeout, momentarily timing out under load) would return False for a single get_tool_definitions() call. Because file tools delegate their check_fn to the terminal check, that one flake silently stripped read_file/write_file/patch/search_files AND terminal from whatever agent was being constructed at that instant — most visibly a delegate_task subagent, which then reported "Tool read_file does not exist". This explains both the intermittent (~80% success) user-session failures and the deterministic cron failures in #21658 / #5304. The existing _check_fn TTL cache made this worse: it cached the transient False for the full 30s window, poisoning every subagent spawned in that span. Fix: remember the last time each check_fn returned True; when a fresh probe fails within a short grace window of that success, treat it as a flake — serve the last-good True and do NOT cache the failure (so the next call re-probes). A failure with no recent success, or past the grace window, is honored normally so a backend that genuinely went down stops advertising its tools. Probe failures now log at WARNING regardless of quiet mode, making the previously-silent tool loss diagnosable in subagent (quiet) sessions. Co-authored-by: Stuart Horner <5261694+djstunami@users.noreply.github.com>
198 lines
7.4 KiB
Python
198 lines
7.4 KiB
Python
"""Tests for terminal/file tool availability in local dev environments."""
|
|
|
|
import importlib
|
|
|
|
import pytest
|
|
|
|
from model_tools import get_tool_definitions
|
|
|
|
terminal_tool_module = importlib.import_module("tools.terminal_tool")
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _clear_caches():
|
|
"""Invalidate check_fn and tool-definitions caches before each test
|
|
so that monkeypatched env vars / config take effect."""
|
|
from tools.registry import invalidate_check_fn_cache
|
|
from model_tools import _clear_tool_defs_cache
|
|
invalidate_check_fn_cache()
|
|
_clear_tool_defs_cache()
|
|
yield
|
|
invalidate_check_fn_cache()
|
|
_clear_tool_defs_cache()
|
|
|
|
|
|
class TestTerminalRequirements:
|
|
def test_local_backend_requirements(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
terminal_tool_module,
|
|
"_get_env_config",
|
|
lambda: {"env_type": "local"},
|
|
)
|
|
assert terminal_tool_module.check_terminal_requirements() is True
|
|
|
|
def test_terminal_and_file_tools_resolve_for_local_backend(self, monkeypatch):
|
|
monkeypatch.setattr(
|
|
terminal_tool_module,
|
|
"_get_env_config",
|
|
lambda: {"env_type": "local"},
|
|
)
|
|
tools = get_tool_definitions(enabled_toolsets=["terminal", "file"], quiet_mode=True)
|
|
names = {tool["function"]["name"] for tool in tools}
|
|
assert "terminal" in names
|
|
assert {"read_file", "write_file", "patch", "search_files"}.issubset(names)
|
|
|
|
def test_terminal_and_execute_code_tools_resolve_for_managed_modal(self, monkeypatch, tmp_path):
|
|
monkeypatch.setattr("tools.tool_backend_helpers.managed_nous_tools_enabled", lambda: True)
|
|
monkeypatch.setattr(terminal_tool_module, "managed_nous_tools_enabled", lambda: True)
|
|
monkeypatch.setenv("HOME", str(tmp_path))
|
|
monkeypatch.setenv("USERPROFILE", str(tmp_path))
|
|
monkeypatch.delenv("MODAL_TOKEN_ID", raising=False)
|
|
monkeypatch.delenv("MODAL_TOKEN_SECRET", raising=False)
|
|
monkeypatch.setattr(
|
|
terminal_tool_module,
|
|
"_get_env_config",
|
|
lambda: {"env_type": "modal", "modal_mode": "managed"},
|
|
)
|
|
monkeypatch.setattr(
|
|
terminal_tool_module,
|
|
"is_managed_tool_gateway_ready",
|
|
lambda _vendor: True,
|
|
)
|
|
tools = get_tool_definitions(enabled_toolsets=["terminal", "code_execution"], quiet_mode=True)
|
|
names = {tool["function"]["name"] for tool in tools}
|
|
|
|
assert "terminal" in names
|
|
assert "execute_code" in names
|
|
|
|
|
|
class TestCheckFnTransientFailureSuppression:
|
|
"""The check_fn TTL cache should absorb transient probe failures.
|
|
|
|
Regression coverage for #21658 / #5304: a single flaky
|
|
``check_terminal_requirements()`` (Docker daemon busy, probe timeout)
|
|
must not silently strip the terminal/file toolset from a subagent. After
|
|
a recent success, a transient False is treated as a flake; a failure with
|
|
no recent success — or past the grace window — is honored.
|
|
"""
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset(self):
|
|
from tools.registry import invalidate_check_fn_cache
|
|
|
|
invalidate_check_fn_cache()
|
|
yield
|
|
invalidate_check_fn_cache()
|
|
|
|
def test_transient_failure_after_success_is_suppressed(self, monkeypatch):
|
|
import tools.registry as reg
|
|
|
|
calls = {"n": 0}
|
|
|
|
def flaky():
|
|
calls["n"] += 1
|
|
# First call succeeds, second flakes (False).
|
|
return calls["n"] == 1
|
|
|
|
# Pin the cache clock so the TTL doesn't serve a stale entry between
|
|
# the two probes — we want both to actually run.
|
|
t = {"now": 1000.0}
|
|
monkeypatch.setattr(reg.time, "monotonic", lambda: t["now"])
|
|
|
|
assert reg._check_fn_cached(flaky) is True # records last-good
|
|
t["now"] += reg._CHECK_FN_TTL_SECONDS + 1 # expire the TTL cache
|
|
# Within grace window of the success → flake suppressed, stays True.
|
|
assert reg._check_fn_cached(flaky) is True
|
|
assert calls["n"] == 2 # the probe actually ran (not just cached)
|
|
|
|
def test_persistent_failure_after_grace_is_honored(self, monkeypatch):
|
|
import tools.registry as reg
|
|
|
|
def good():
|
|
return True
|
|
|
|
def bad():
|
|
return False
|
|
|
|
t = {"now": 1000.0}
|
|
monkeypatch.setattr(reg.time, "monotonic", lambda: t["now"])
|
|
|
|
assert reg._check_fn_cached(good) is True
|
|
# Advance past the failure grace window, then fail.
|
|
t["now"] += reg._CHECK_FN_FAILURE_GRACE_SECONDS + 1
|
|
# Different fn so last-good for `good` doesn't apply; bad has no success.
|
|
assert reg._check_fn_cached(bad) is False
|
|
|
|
def test_failure_with_no_prior_success_is_honored(self, monkeypatch):
|
|
import tools.registry as reg
|
|
|
|
def never():
|
|
return False
|
|
|
|
t = {"now": 1000.0}
|
|
monkeypatch.setattr(reg.time, "monotonic", lambda: t["now"])
|
|
assert reg._check_fn_cached(never) is False
|
|
|
|
def test_grace_expiry_lets_real_outage_through(self, monkeypatch):
|
|
import tools.registry as reg
|
|
|
|
state = {"ok": True}
|
|
|
|
def probe():
|
|
return state["ok"]
|
|
|
|
t = {"now": 1000.0}
|
|
monkeypatch.setattr(reg.time, "monotonic", lambda: t["now"])
|
|
|
|
assert reg._check_fn_cached(probe) is True
|
|
state["ok"] = False
|
|
# Just past TTL, within grace → flake suppressed.
|
|
t["now"] += reg._CHECK_FN_TTL_SECONDS + 1
|
|
assert reg._check_fn_cached(probe) is True
|
|
# Now move well past the grace window since the last success → honored.
|
|
t["now"] += reg._CHECK_FN_FAILURE_GRACE_SECONDS + 1
|
|
assert reg._check_fn_cached(probe) is False
|
|
|
|
def test_subagent_keeps_file_tools_through_docker_flake(self, monkeypatch):
|
|
"""End-to-end: a docker probe that flakes on the 2nd build keeps the
|
|
file/terminal toolset available for the subagent being constructed."""
|
|
import tools.registry as reg
|
|
|
|
flake = {"first": True}
|
|
|
|
def flaky_terminal_check():
|
|
if flake["first"]:
|
|
flake["first"] = False
|
|
return True
|
|
return False # transient flake on the subagent build
|
|
|
|
monkeypatch.setattr(
|
|
terminal_tool_module, "check_terminal_requirements", flaky_terminal_check
|
|
)
|
|
# file tools delegate to the same check via tools.check_file_requirements.
|
|
import tools as tools_pkg
|
|
|
|
monkeypatch.setattr(
|
|
tools_pkg, "check_file_requirements", flaky_terminal_check
|
|
)
|
|
|
|
t = {"now": 5000.0}
|
|
monkeypatch.setattr(reg.time, "monotonic", lambda: t["now"])
|
|
|
|
from model_tools import get_tool_definitions, _clear_tool_defs_cache
|
|
|
|
reg.invalidate_check_fn_cache()
|
|
_clear_tool_defs_cache()
|
|
# Parent build (probe ok) → records last-good.
|
|
parent = get_tool_definitions(enabled_toolsets=["terminal", "file"], quiet_mode=True)
|
|
assert "read_file" in {x["function"]["name"] for x in parent}
|
|
|
|
# Subagent build moments later: TTL expired, probe flakes False, but
|
|
# within grace → file/terminal tools must still resolve.
|
|
t["now"] += reg._CHECK_FN_TTL_SECONDS + 1
|
|
_clear_tool_defs_cache()
|
|
child = get_tool_definitions(enabled_toolsets=["terminal", "file"], quiet_mode=True)
|
|
child_names = {x["function"]["name"] for x in child}
|
|
assert {"read_file", "write_file", "patch", "search_files", "terminal"}.issubset(
|
|
child_names
|
|
)
|