mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-29 06:31:32 +00:00
feat(prompt): universal task-completion guidance + local Python toolchain probe (#34340)
* fix(codex): surface error code in Responses 'failed' status errors
When a Codex Responses turn ends with status=failed, the response carries
the failure details under `response.error` as
`{code, message, param, ...}`. The previous extractor pulled only
`message`, so users seeing a rate-limit failure got a bare "Slow down"
string indistinguishable from a generic stream truncation; an
internal_error with empty message degraded to a dict dump
("{'code': 'internal_error', 'message': ''}").
Extract a `_format_responses_error()` helper that:
- prefixes `code` when both code and message are present
(e.g. 'rate_limit_exceeded: Slow down')
- falls back to the bare `code` when message is empty
- accepts both dict and attribute-style payloads (SDK and JSON-RPC paths)
- preserves the prior status-only fallback when no error payload exists
Apply the same helper at the sibling site in
`codex_app_server_session.run_turn()` so codex-CLI subprocess turn
failures get the same treatment.
Tests:
- 8 new unit tests for `_format_responses_error` covering both shapes,
empty/missing fields, non-string fields, and the status-only fallback.
- 2 regression tests on `_normalize_codex_response` for failed status
with and without a code, asserting the exact RuntimeError message.
- All 3603 tests in tests/agent/ pass.
Adapted from anomalyco/opencode#28757.
* feat(prompt): universal task-completion guidance + local Python toolchain probe
Two cross-model failure modes get a single-line answer in the cached
system prompt. Both gated by config (default on), both add zero overhead
when not needed, both verified via real AIAgent prompt builds.
## What changed
`TASK_COMPLETION_GUIDANCE` — short prompt block applied to ALL models.
Targets two failure modes observed on a real Sarasota real-estate build
task: (1) Opus stopped after writing an 85-byte stub and gave a prose
response with finish_reason=stop on call #3 of 90; (2) DeepSeek pushed
through a PEP-668 wall, then returned fabricated listings instead of
admitting the blocker. Both behaviors are model-family-agnostic, so the
guidance lives outside the existing tool_use_enforcement gate (~192
tokens, paid once per session via prefix cache).
`tools/env_probe.py` — local Python toolchain probe. Detects
python3/pip/uv/PEP-668 state and emits ONE short line in the system
prompt when something is non-default. Emits NOTHING when the env is
clean (zero token cost for normal users). Skipped entirely for remote
terminal backends (docker/modal/ssh) — they have their own probe.
Example output on a broken environment (the actual case):
Python toolchain: python3=3.11.15 (no pip module),
python=missing (use python3), pip→python3.12 (mismatch),
PEP 668=yes (use venv or uv).
## Config
Both flags live under `agent.` in config.yaml, default True:
agent:
task_completion_guidance: true # universal "finish the job" block
environment_probe: true # local Python toolchain hints
Neither addition required a `_config_version` bump — deep-merge fills
defaults in for existing user configs.
## Validation
| Test surface | Result |
|---|---|
| tests/tools/test_env_probe.py | 10/10 pass (probe unit) |
| tests/run_agent/test_run_agent.py — new classes | 8/8 pass (integration) |
| TestToolUseEnforcementConfig | 17/17 pass (no regression) |
| TestBuildSystemPrompt | 9/9 pass (no regression) |
| TestInvalidateSystemPrompt | 2/2 pass (no regression) |
| tests/agent/test_prompt_builder.py | 124/124 pass (no regression) |
| tests/hermes_cli/ | 5662/5662 pass (config defaults) |
| E2E AIAgent build (broken env) | Both blocks present, 2,178 chars |
| E2E AIAgent build (clean env) | 771-char net overhead, env probe silent |
This commit is contained in:
parent
75d2c081c9
commit
a4d8f0f62a
10 changed files with 819 additions and 6 deletions
|
|
@ -1201,6 +1201,18 @@ def init_agent(
|
|||
_agent_section = {}
|
||||
agent._tool_use_enforcement = _agent_section.get("tool_use_enforcement", "auto")
|
||||
|
||||
# Universal task-completion guidance toggle. Default True. Surfaced
|
||||
# as a separate flag from tool_use_enforcement because the guidance
|
||||
# applies to ALL models, not just the model families enforcement
|
||||
# targets.
|
||||
agent._task_completion_guidance = bool(_agent_section.get("task_completion_guidance", True))
|
||||
|
||||
# Local Python toolchain probe toggle. Default True. When False,
|
||||
# the probe is skipped entirely (no subprocess calls, no system-prompt
|
||||
# line). Useful for users on exotic setups where the probe heuristics
|
||||
# are noisy.
|
||||
agent._environment_probe = bool(_agent_section.get("environment_probe", True))
|
||||
|
||||
# App-level API retry count (wraps each model API call). Default 3,
|
||||
# overridable via agent.api_max_retries in config.yaml. See #11616.
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -980,6 +980,48 @@ def _extract_responses_reasoning_text(item: Any) -> str:
|
|||
return ""
|
||||
|
||||
|
||||
def _format_responses_error(error_obj: Any, response_status: str) -> str:
|
||||
"""Build a human-readable error string from a Responses ``response.error`` payload.
|
||||
|
||||
The OpenAI Responses API carries failure details under ``response.error``
|
||||
on terminal ``response.failed`` events, in the shape
|
||||
``{"code": "rate_limit_exceeded", "message": "Slow down", "param": ...}``.
|
||||
Earlier code only surfaced ``message``, which left users staring at bare
|
||||
strings like ``"Slow down"`` while the failure mode (rate limit vs
|
||||
context-length vs internal_error vs model-overloaded) was hidden in
|
||||
``code``. We now prefix ``code`` when both are present so consumers can
|
||||
distinguish failure modes without parsing the bare message.
|
||||
|
||||
Falls back to ``code`` alone when ``message`` is empty, and to a stable
|
||||
default referencing the response status when no error payload is
|
||||
available at all. Adapted from anomalyco/opencode#28757.
|
||||
"""
|
||||
# Pull code and message from either dict or attribute-style payloads.
|
||||
code: Any = None
|
||||
message: Any = None
|
||||
if isinstance(error_obj, dict):
|
||||
code = error_obj.get("code")
|
||||
message = error_obj.get("message")
|
||||
elif error_obj is not None:
|
||||
code = getattr(error_obj, "code", None)
|
||||
message = getattr(error_obj, "message", None)
|
||||
|
||||
code_str = str(code).strip() if isinstance(code, str) else (str(code).strip() if code else "")
|
||||
message_str = str(message).strip() if isinstance(message, str) else (str(message).strip() if message else "")
|
||||
|
||||
if code_str and message_str:
|
||||
return f"{code_str}: {message_str}"
|
||||
if message_str:
|
||||
return message_str
|
||||
if code_str:
|
||||
return code_str
|
||||
if error_obj:
|
||||
# Last-resort: stringify whatever the provider sent so it's at least
|
||||
# visible in logs/UI rather than silently swallowed.
|
||||
return str(error_obj)
|
||||
return f"Responses API returned status '{response_status}'"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Full response normalization
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -1023,10 +1065,7 @@ def _normalize_codex_response(
|
|||
|
||||
if response_status in {"failed", "cancelled"}:
|
||||
error_obj = getattr(response, "error", None)
|
||||
if isinstance(error_obj, dict):
|
||||
error_msg = error_obj.get("message") or str(error_obj)
|
||||
else:
|
||||
error_msg = str(error_obj) if error_obj else f"Responses API returned status '{response_status}'"
|
||||
error_msg = _format_responses_error(error_obj, response_status)
|
||||
raise RuntimeError(error_msg)
|
||||
|
||||
content_parts: List[str] = []
|
||||
|
|
|
|||
|
|
@ -262,6 +262,37 @@ TOOL_USE_ENFORCEMENT_GUIDANCE = (
|
|||
# Add new patterns here when a model family needs explicit steering.
|
||||
TOOL_USE_ENFORCEMENT_MODELS = ("gpt", "codex", "gemini", "gemma", "grok", "glm", "qwen", "deepseek")
|
||||
|
||||
# Universal "finish the job" guidance — applied to ALL models, not gated
|
||||
# by model family. Addresses two cross-model failure modes:
|
||||
# 1. Stopping after a stub: writing a tiny file or running one command
|
||||
# and then ending the turn with a description of the plan instead
|
||||
# of the finished artifact. (Observed on Opus during a real
|
||||
# Sarasota real-estate build task: 3 API calls, 85-byte file,
|
||||
# one terminal command, finish_reason=stop.)
|
||||
# 2. Fabricating output when a real path is blocked. When `pip` or a
|
||||
# tool fails, some models will synthesize plausible-looking results
|
||||
# (fake addresses, fake JSON, fake numbers) instead of reporting
|
||||
# the blocker. (Observed on DeepSeek v4-flash on the same task:
|
||||
# pushed through PEP-668 wall, then returned fabricated listings.)
|
||||
#
|
||||
# Short on purpose. This block is shipped to every user, every session,
|
||||
# in the cached system prompt — token cost is paid once at install and
|
||||
# then amortised across all sessions via prefix caching. Keep it tight.
|
||||
TASK_COMPLETION_GUIDANCE = (
|
||||
"# Finishing the job\n"
|
||||
"When the user asks you to build, run, or verify something, the deliverable is "
|
||||
"a working artifact backed by real tool output — not a description of one. "
|
||||
"Do not stop after writing a stub, a plan, or a single command. Keep working "
|
||||
"until you have actually exercised the code or produced the requested result, "
|
||||
"then report what real execution returned.\n"
|
||||
"If a tool, install, or network call fails and blocks the real path, say so "
|
||||
"directly and try an alternative (different package manager, different "
|
||||
"approach, ask the user). NEVER substitute plausible-looking fabricated "
|
||||
"output (made-up data, invented file contents, synthesised API responses) "
|
||||
"for results you couldn't actually produce. Reporting a blocker honestly "
|
||||
"is always better than inventing a result."
|
||||
)
|
||||
|
||||
# OpenAI GPT/Codex-specific execution guidance. Addresses known failure modes
|
||||
# where GPT models abandon work on partial results, skip prerequisite lookups,
|
||||
# hallucinate instead of using tools, and declare "done" without verification.
|
||||
|
|
|
|||
|
|
@ -37,6 +37,7 @@ from agent.prompt_builder import (
|
|||
PLATFORM_HINTS,
|
||||
SESSION_SEARCH_GUIDANCE,
|
||||
SKILLS_GUIDANCE,
|
||||
TASK_COMPLETION_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_GUIDANCE,
|
||||
TOOL_USE_ENFORCEMENT_MODELS,
|
||||
)
|
||||
|
|
@ -100,6 +101,15 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
|||
# Pointer to the hermes-agent skill + docs for user questions about Hermes itself.
|
||||
stable_parts.append(HERMES_AGENT_HELP_GUIDANCE)
|
||||
|
||||
# Universal task-completion / no-fabrication guidance. Applied to ALL
|
||||
# models regardless of tool_use_enforcement gating — the failure modes
|
||||
# this targets (stopping after a stub; fabricating output when a real
|
||||
# path is blocked) are not model-family specific. Gated only by
|
||||
# config.yaml ``agent.task_completion_guidance`` (default True) so
|
||||
# users who want a leaner prompt can turn it off.
|
||||
if getattr(agent, "_task_completion_guidance", True) and agent.valid_tool_names:
|
||||
stable_parts.append(TASK_COMPLETION_GUIDANCE)
|
||||
|
||||
# Tool-aware behavioral guidance: only inject when the tools are loaded
|
||||
tool_guidance = []
|
||||
if "memory" in agent.valid_tool_names:
|
||||
|
|
@ -205,6 +215,23 @@ def build_system_prompt_parts(agent: Any, system_message: Optional[str] = None)
|
|||
if _env_hints:
|
||||
stable_parts.append(_env_hints)
|
||||
|
||||
# Local Python toolchain probe — names python/pip/uv/PEP-668 state when
|
||||
# something is non-default so the model can pick the right install
|
||||
# strategy without discovering by failure. Emits a single line; emits
|
||||
# NOTHING when the environment is clean (no token cost). Skipped
|
||||
# entirely for remote terminal backends (the host's Python state is
|
||||
# irrelevant when tools run inside docker/modal/ssh). Gated by
|
||||
# config.yaml ``agent.environment_probe`` (default True).
|
||||
if getattr(agent, "_environment_probe", True):
|
||||
try:
|
||||
from tools.env_probe import get_environment_probe_line
|
||||
_probe_line = get_environment_probe_line()
|
||||
if _probe_line:
|
||||
stable_parts.append(_probe_line)
|
||||
except Exception:
|
||||
# Probe failure must never block prompt build.
|
||||
pass
|
||||
|
||||
# Active-profile hint — names the Hermes profile the agent is running
|
||||
# under so it doesn't conflate ~/.hermes/skills/ (default profile) with
|
||||
# ~/.hermes/profiles/<active>/skills/ (this profile's). Deterministic
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import time
|
|||
from dataclasses import dataclass, field
|
||||
from typing import Any, Callable, Optional
|
||||
|
||||
from agent.codex_responses_adapter import _format_responses_error
|
||||
from agent.redact import redact_sensitive_text
|
||||
from agent.transports.codex_app_server import (
|
||||
CodexAppServerClient,
|
||||
|
|
@ -581,7 +582,7 @@ class CodexAppServerSession:
|
|||
(note.get("params") or {}).get("turn") or {}
|
||||
).get("error")
|
||||
if err_obj:
|
||||
err_msg = err_obj.get("message") or str(err_obj)
|
||||
err_msg = _format_responses_error(err_obj, str(turn_status))
|
||||
# If the turn failed for an auth/refresh reason,
|
||||
# rewrite the error into a re-auth hint AND mark
|
||||
# the session for retirement.
|
||||
|
|
|
|||
|
|
@ -669,6 +669,20 @@ DEFAULT_CONFIG = {
|
|||
# (force on/off for all models), or a list of model-name substrings
|
||||
# to match (e.g. ["gpt", "codex", "gemini", "qwen"]).
|
||||
"tool_use_enforcement": "auto",
|
||||
# Universal "finish the job" guidance — short prompt block applied to
|
||||
# all models that targets two cross-family failure modes: (1) stopping
|
||||
# after a stub instead of finishing the artifact, (2) fabricating
|
||||
# plausible-looking output when a real path is blocked. Costs ~80
|
||||
# tokens in the cached system prompt. Set False to disable globally.
|
||||
"task_completion_guidance": True,
|
||||
# Local-environment toolchain probe — surfaces Python/pip/uv/PEP-668
|
||||
# state in the system prompt when something non-default is detected
|
||||
# (e.g. python3 has no pip module, pip→python version mismatch, PEP
|
||||
# 668 enforcement without uv). Costs zero tokens when the env is
|
||||
# clean (probe emits nothing). Skipped for remote terminal backends
|
||||
# (docker/modal/ssh — they have their own probe). Set False to
|
||||
# disable entirely.
|
||||
"environment_probe": True,
|
||||
# Staged inactivity warning: send a warning to the user at this
|
||||
# threshold before escalating to a full timeout. The warning fires
|
||||
# once per run and does not interrupt the agent. 0 = disable warning.
|
||||
|
|
|
|||
|
|
@ -1,6 +1,11 @@
|
|||
from types import SimpleNamespace
|
||||
|
||||
from agent.codex_responses_adapter import _normalize_codex_response
|
||||
import pytest
|
||||
|
||||
from agent.codex_responses_adapter import (
|
||||
_format_responses_error,
|
||||
_normalize_codex_response,
|
||||
)
|
||||
|
||||
|
||||
def test_normalize_codex_response_drops_transient_rs_tmp_reasoning_items():
|
||||
|
|
@ -61,3 +66,111 @@ def test_normalize_codex_response_treats_summary_only_reasoning_as_incomplete():
|
|||
assert assistant_message.content == ""
|
||||
assert assistant_message.reasoning == "still thinking"
|
||||
assert assistant_message.codex_reasoning_items is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _format_responses_error — adapted from anomalyco/opencode#28757.
|
||||
# Provider failures should surface BOTH the code (rate_limit_exceeded /
|
||||
# context_length_exceeded / internal_error / server_error) and the message,
|
||||
# so consumers can tell rate limits apart from context-length failures and
|
||||
# both apart from generic stream drops.
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def test_format_responses_error_combines_code_and_message():
|
||||
err = {"code": "rate_limit_exceeded", "message": "Slow down"}
|
||||
assert _format_responses_error(err, "failed") == "rate_limit_exceeded: Slow down"
|
||||
|
||||
|
||||
def test_format_responses_error_message_only():
|
||||
err = {"message": "Upstream model unavailable"}
|
||||
assert _format_responses_error(err, "failed") == "Upstream model unavailable"
|
||||
|
||||
|
||||
def test_format_responses_error_code_only_when_message_empty():
|
||||
# Some providers/proxies emit a code with an empty message body. We
|
||||
# used to fall back to ``str(error_obj)`` — a dict dump — which leaked
|
||||
# ``{'code': 'internal_error', 'message': ''}`` into chat output. Now
|
||||
# the bare code is surfaced, which is the meaningful field.
|
||||
err = {"code": "internal_error", "message": ""}
|
||||
assert _format_responses_error(err, "failed") == "internal_error"
|
||||
|
||||
|
||||
def test_format_responses_error_code_only_when_message_missing():
|
||||
err = {"code": "server_error"}
|
||||
assert _format_responses_error(err, "failed") == "server_error"
|
||||
|
||||
|
||||
def test_format_responses_error_attribute_style_payload():
|
||||
# SDK objects expose ``code``/``message`` as attributes rather than dict
|
||||
# keys. The helper must accept both shapes since the Responses SDK
|
||||
# returns SimpleNamespace-style objects on ``response.failed``.
|
||||
err = SimpleNamespace(code="context_length_exceeded", message="too long")
|
||||
assert _format_responses_error(err, "failed") == "context_length_exceeded: too long"
|
||||
|
||||
|
||||
def test_format_responses_error_falls_back_to_status_when_empty():
|
||||
assert (
|
||||
_format_responses_error(None, "failed")
|
||||
== "Responses API returned status 'failed'"
|
||||
)
|
||||
assert (
|
||||
_format_responses_error(None, "cancelled")
|
||||
== "Responses API returned status 'cancelled'"
|
||||
)
|
||||
|
||||
|
||||
def test_format_responses_error_stringifies_opaque_payload():
|
||||
# Last-resort: a provider sent something that isn't a dict and has no
|
||||
# code/message attributes. Surface its repr rather than swallow it
|
||||
# silently — at least it's visible in logs.
|
||||
assert _format_responses_error("opaque sentinel", "failed") == "opaque sentinel"
|
||||
|
||||
|
||||
def test_format_responses_error_ignores_non_string_code_message():
|
||||
# Defensive: a malformed gateway could send numbers/objects in these
|
||||
# fields. We don't want to crash; we want a best-effort string.
|
||||
err = {"code": 500, "message": None}
|
||||
assert _format_responses_error(err, "failed") == "500"
|
||||
|
||||
|
||||
def test_normalize_codex_response_failed_includes_code_in_error():
|
||||
"""Regression: response_status == 'failed' should surface the error
|
||||
code, not just the message. Used to leak a bare 'Slow down' string
|
||||
that was indistinguishable from a generic stream truncation."""
|
||||
# ``output`` non-empty so we don't trip the "no output items" guard
|
||||
# before reaching the failed-status branch. Real failed responses
|
||||
# often DO carry a partial message item alongside the error.
|
||||
response = SimpleNamespace(
|
||||
status="failed",
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="incomplete",
|
||||
content=[SimpleNamespace(type="output_text", text="partial")],
|
||||
),
|
||||
],
|
||||
error={"code": "rate_limit_exceeded", "message": "Slow down"},
|
||||
)
|
||||
with pytest.raises(RuntimeError, match=r"^rate_limit_exceeded: Slow down$"):
|
||||
_normalize_codex_response(response)
|
||||
|
||||
|
||||
def test_normalize_codex_response_failed_with_message_only():
|
||||
"""Backwards-compat: a failed response with only a message field
|
||||
(no code) should still surface that message verbatim."""
|
||||
response = SimpleNamespace(
|
||||
status="failed",
|
||||
output=[
|
||||
SimpleNamespace(
|
||||
type="message",
|
||||
role="assistant",
|
||||
status="incomplete",
|
||||
content=[SimpleNamespace(type="output_text", text="partial")],
|
||||
),
|
||||
],
|
||||
error={"message": "model error"},
|
||||
)
|
||||
with pytest.raises(RuntimeError, match=r"^model error$"):
|
||||
_normalize_codex_response(response)
|
||||
|
|
|
|||
|
|
@ -1323,6 +1323,178 @@ class TestToolUseEnforcementConfig:
|
|||
assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt
|
||||
|
||||
|
||||
class TestTaskCompletionGuidance:
|
||||
"""Tests for the universal task-completion / no-fabrication guidance
|
||||
(config.yaml ``agent.task_completion_guidance``).
|
||||
|
||||
Unlike tool_use_enforcement, this block is model-family-agnostic — it
|
||||
targets cross-model failure modes (stopping after a stub; fabricating
|
||||
output when blocked) and should appear for every model by default."""
|
||||
|
||||
def _make_agent(self, model="anthropic/claude-opus-4.8",
|
||||
task_completion_guidance=True, **extra_cfg):
|
||||
agent_cfg = {"task_completion_guidance": task_completion_guidance}
|
||||
agent_cfg.update(extra_cfg)
|
||||
with (
|
||||
patch(
|
||||
"run_agent.get_tool_definitions",
|
||||
return_value=_make_tool_defs("terminal", "web_search"),
|
||||
),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("run_agent.OpenAI"),
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"agent": agent_cfg},
|
||||
),
|
||||
):
|
||||
a = AIAgent(
|
||||
model=model,
|
||||
api_key="test-key-1234567890",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
a.client = MagicMock()
|
||||
return a
|
||||
|
||||
def test_default_injects_for_claude(self):
|
||||
"""The block must reach Claude by default — that's the
|
||||
primary motivating model family."""
|
||||
from agent.prompt_builder import TASK_COMPLETION_GUIDANCE
|
||||
agent = self._make_agent(model="anthropic/claude-opus-4.8")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TASK_COMPLETION_GUIDANCE in prompt
|
||||
|
||||
def test_default_injects_for_deepseek(self):
|
||||
"""And for DeepSeek — the other model that failed the Sarasota
|
||||
real-estate task by fabricating output."""
|
||||
from agent.prompt_builder import TASK_COMPLETION_GUIDANCE
|
||||
agent = self._make_agent(model="deepseek/deepseek-v4-flash")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TASK_COMPLETION_GUIDANCE in prompt
|
||||
|
||||
def test_default_injects_for_gpt(self):
|
||||
"""Also reaches model families that already get enforcement —
|
||||
it's additive, not exclusive."""
|
||||
from agent.prompt_builder import TASK_COMPLETION_GUIDANCE
|
||||
agent = self._make_agent(model="openai/gpt-5.4")
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TASK_COMPLETION_GUIDANCE in prompt
|
||||
|
||||
def test_false_disables(self):
|
||||
from agent.prompt_builder import TASK_COMPLETION_GUIDANCE
|
||||
agent = self._make_agent(
|
||||
model="anthropic/claude-opus-4.8", task_completion_guidance=False
|
||||
)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert TASK_COMPLETION_GUIDANCE not in prompt
|
||||
|
||||
def test_no_tools_no_injection(self):
|
||||
"""Same gate as tool_use_enforcement — no tools means no guidance.
|
||||
The guidance refers to ``tool calls`` and ``tool output``; without
|
||||
tools it would be advice for a capability the agent doesn't have."""
|
||||
from agent.prompt_builder import TASK_COMPLETION_GUIDANCE
|
||||
with (
|
||||
patch("run_agent.get_tool_definitions", return_value=[]),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("run_agent.OpenAI"),
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"agent": {"task_completion_guidance": True}},
|
||||
),
|
||||
):
|
||||
a = AIAgent(
|
||||
api_key="test-key-1234567890",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
enabled_toolsets=[],
|
||||
)
|
||||
a.client = MagicMock()
|
||||
assert TASK_COMPLETION_GUIDANCE not in a._build_system_prompt()
|
||||
|
||||
|
||||
class TestEnvironmentProbeIntegration:
|
||||
"""Tests for the local Python toolchain probe wiring (config.yaml
|
||||
``agent.environment_probe``). The probe itself is unit-tested in
|
||||
tests/tools/test_env_probe.py; this class confirms it lands in the
|
||||
system prompt when enabled and stays out when disabled."""
|
||||
|
||||
def _make_agent(self, model="anthropic/claude-opus-4.8",
|
||||
environment_probe=True):
|
||||
with (
|
||||
patch(
|
||||
"run_agent.get_tool_definitions",
|
||||
return_value=_make_tool_defs("terminal"),
|
||||
),
|
||||
patch("run_agent.check_toolset_requirements", return_value={}),
|
||||
patch("run_agent.OpenAI"),
|
||||
patch(
|
||||
"hermes_cli.config.load_config",
|
||||
return_value={"agent": {"environment_probe": environment_probe}},
|
||||
),
|
||||
):
|
||||
a = AIAgent(
|
||||
model=model,
|
||||
api_key="test-key-1234567890",
|
||||
base_url="https://openrouter.ai/api/v1",
|
||||
quiet_mode=True,
|
||||
skip_context_files=True,
|
||||
skip_memory=True,
|
||||
)
|
||||
a.client = MagicMock()
|
||||
return a
|
||||
|
||||
def test_probe_appears_when_problem_detected(self, monkeypatch):
|
||||
"""When the probe finds something off, the line lands in the prompt."""
|
||||
from tools import env_probe
|
||||
env_probe._reset_cache_for_tests()
|
||||
monkeypatch.setattr(env_probe, "_python_version_of",
|
||||
lambda b: {"python3": "3.11.15"}.get(b))
|
||||
monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: False)
|
||||
monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: True)
|
||||
monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
|
||||
monkeypatch.setattr(env_probe.shutil, "which",
|
||||
lambda name: None if name == "uv" else "/usr/bin/" + name)
|
||||
|
||||
agent = self._make_agent(environment_probe=True)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert "Python toolchain:" in prompt
|
||||
assert "3.11.15" in prompt
|
||||
|
||||
def test_probe_silent_on_clean_env(self, monkeypatch):
|
||||
"""Clean environment → probe emits nothing → no line in prompt."""
|
||||
from tools import env_probe
|
||||
env_probe._reset_cache_for_tests()
|
||||
monkeypatch.setattr(env_probe, "_python_version_of",
|
||||
lambda b: "3.13.3" if b == "python3" else None)
|
||||
monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: True)
|
||||
monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: False)
|
||||
monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.13")
|
||||
monkeypatch.setattr(env_probe.shutil, "which", lambda name: None)
|
||||
|
||||
agent = self._make_agent(environment_probe=True)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert "Python toolchain:" not in prompt
|
||||
|
||||
def test_probe_disabled_by_config(self, monkeypatch):
|
||||
"""Even with detectable problems, the probe stays out when disabled."""
|
||||
from tools import env_probe
|
||||
env_probe._reset_cache_for_tests()
|
||||
monkeypatch.setattr(env_probe, "_python_version_of",
|
||||
lambda b: {"python3": "3.11.15"}.get(b))
|
||||
monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: False)
|
||||
monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: True)
|
||||
monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
|
||||
monkeypatch.setattr(env_probe.shutil, "which", lambda name: None)
|
||||
|
||||
agent = self._make_agent(environment_probe=False)
|
||||
prompt = agent._build_system_prompt()
|
||||
assert "Python toolchain:" not in prompt
|
||||
|
||||
|
||||
class TestInvalidateSystemPrompt:
|
||||
def test_clears_cache(self, agent):
|
||||
agent._cached_system_prompt = "cached value"
|
||||
|
|
|
|||
157
tests/tools/test_env_probe.py
Normal file
157
tests/tools/test_env_probe.py
Normal file
|
|
@ -0,0 +1,157 @@
|
|||
"""Tests for tools/env_probe.py — local Python toolchain probe."""
|
||||
|
||||
import sys
|
||||
|
||||
import pytest
|
||||
|
||||
from tools import env_probe
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_probe_cache():
|
||||
"""Each test starts with a clean cache."""
|
||||
env_probe._reset_cache_for_tests()
|
||||
yield
|
||||
env_probe._reset_cache_for_tests()
|
||||
|
||||
|
||||
class TestSilentWhenHealthy:
|
||||
"""The probe must emit nothing when the environment is clean — otherwise
|
||||
every prompt for every user pays an unnecessary token tax."""
|
||||
|
||||
def test_clean_env_returns_empty(self, monkeypatch):
|
||||
"""python3 + pip module + no PEP 668 → silent."""
|
||||
monkeypatch.setattr(env_probe, "_python_version_of",
|
||||
lambda b: "3.13.3" if b == "python3" else None)
|
||||
monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: True)
|
||||
monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: False)
|
||||
monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.13")
|
||||
monkeypatch.setattr(env_probe.shutil, "which", lambda name: None)
|
||||
assert env_probe.get_environment_probe_line() == ""
|
||||
|
||||
def test_pep668_with_uv_returns_empty(self, monkeypatch):
|
||||
"""PEP 668 alone shouldn't trigger output if uv is installed —
|
||||
agent has a viable install path."""
|
||||
monkeypatch.setattr(env_probe, "_python_version_of",
|
||||
lambda b: "3.12.4" if b == "python3" else None)
|
||||
monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: True)
|
||||
monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: True)
|
||||
monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
|
||||
monkeypatch.setattr(env_probe.shutil, "which",
|
||||
lambda name: "/usr/local/bin/uv" if name == "uv" else None)
|
||||
assert env_probe.get_environment_probe_line() == ""
|
||||
|
||||
|
||||
class TestEmitsOnRealProblems:
|
||||
"""The probe must produce a usable line for the real failure modes
|
||||
that drove this feature."""
|
||||
|
||||
def test_allen_scenario_python_version_mismatch(self, monkeypatch):
|
||||
"""python3 is 3.11 (no pip module), pip on PATH is 3.12, PEP 668 on,
|
||||
no uv — the exact scenario from the Sarasota real-estate task."""
|
||||
monkeypatch.setattr(env_probe, "_python_version_of",
|
||||
lambda b: {"python3": "3.11.15", "python": None}.get(b))
|
||||
monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: False)
|
||||
monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: True)
|
||||
monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
|
||||
monkeypatch.setattr(env_probe.shutil, "which",
|
||||
lambda name: None if name == "uv" else "/usr/bin/" + name)
|
||||
|
||||
line = env_probe.get_environment_probe_line()
|
||||
assert line # not silent
|
||||
# Single line — must not blow up the system prompt.
|
||||
assert "\n" not in line
|
||||
# Names the real toolchain state
|
||||
assert "3.11.15" in line
|
||||
assert "no pip module" in line
|
||||
assert "mismatch" in line
|
||||
assert "PEP 668" in line
|
||||
# Points at the right escape hatch
|
||||
assert "venv" in line or "uv" in line
|
||||
|
||||
def test_missing_python3_is_named(self, monkeypatch):
|
||||
"""If python3 isn't installed at all, say so."""
|
||||
monkeypatch.setattr(env_probe, "_python_version_of", lambda b: None)
|
||||
monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: False)
|
||||
monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: False)
|
||||
monkeypatch.setattr(env_probe, "_pip_python_version", lambda: None)
|
||||
monkeypatch.setattr(env_probe.shutil, "which", lambda name: None)
|
||||
|
||||
line = env_probe.get_environment_probe_line()
|
||||
assert "python3=missing" in line
|
||||
|
||||
def test_python_missing_but_python3_present(self, monkeypatch):
|
||||
"""Common on Debian: only python3 exists, agent shouldn't type
|
||||
`python`."""
|
||||
monkeypatch.setattr(env_probe, "_python_version_of",
|
||||
lambda b: "3.12.4" if b == "python3" else None)
|
||||
monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: True)
|
||||
monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: True)
|
||||
monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
|
||||
monkeypatch.setattr(env_probe.shutil, "which",
|
||||
lambda name: None if name == "uv" else "/usr/bin/" + name)
|
||||
|
||||
line = env_probe.get_environment_probe_line()
|
||||
# `python=missing` only matters in the non-silent path; PEP 668 (without
|
||||
# uv) is what brings us off-silent here, so check both signals.
|
||||
assert "PEP 668" in line
|
||||
assert "python=missing" in line
|
||||
|
||||
|
||||
class TestSkipsRemoteBackends:
|
||||
"""Remote backends have their own probe; this one must stay out."""
|
||||
|
||||
def test_docker_returns_empty(self, monkeypatch):
|
||||
monkeypatch.setenv("TERMINAL_ENV", "docker")
|
||||
# Even with a broken local env, docker must emit nothing.
|
||||
monkeypatch.setattr(env_probe, "_python_version_of", lambda b: None)
|
||||
monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: False)
|
||||
assert env_probe.get_environment_probe_line() == ""
|
||||
|
||||
def test_modal_returns_empty(self, monkeypatch):
|
||||
monkeypatch.setenv("TERMINAL_ENV", "modal")
|
||||
assert env_probe.get_environment_probe_line() == ""
|
||||
|
||||
def test_ssh_returns_empty(self, monkeypatch):
|
||||
monkeypatch.setenv("TERMINAL_ENV", "ssh")
|
||||
assert env_probe.get_environment_probe_line() == ""
|
||||
|
||||
|
||||
class TestCaching:
|
||||
"""The probe runs once per process — the result is deterministic for
|
||||
the lifetime of the agent."""
|
||||
|
||||
def test_result_cached(self, monkeypatch):
|
||||
calls = []
|
||||
|
||||
def counting_version(b):
|
||||
calls.append(b)
|
||||
return "3.12.4" if b == "python3" else None
|
||||
|
||||
monkeypatch.setattr(env_probe, "_python_version_of", counting_version)
|
||||
monkeypatch.setattr(env_probe, "_has_pip_module", lambda b: True)
|
||||
monkeypatch.setattr(env_probe, "_detect_pep668", lambda b: False)
|
||||
monkeypatch.setattr(env_probe, "_pip_python_version", lambda: "3.12")
|
||||
monkeypatch.setattr(env_probe.shutil, "which", lambda name: None)
|
||||
|
||||
env_probe.get_environment_probe_line()
|
||||
env_probe.get_environment_probe_line()
|
||||
env_probe.get_environment_probe_line()
|
||||
|
||||
# Only the first call probes — caller-counting confirms it.
|
||||
# Two calls (python3 + python) on first invocation, zero after.
|
||||
assert len(calls) == 2
|
||||
|
||||
|
||||
class TestRobustness:
|
||||
"""The probe must NEVER crash the prompt build."""
|
||||
|
||||
def test_subprocess_failure_returns_empty(self, monkeypatch):
|
||||
"""If every subprocess fails, just stay silent."""
|
||||
def boom(*a, **kw):
|
||||
raise OSError("simulated")
|
||||
monkeypatch.setattr(env_probe.subprocess, "run", boom)
|
||||
# Should not raise, should just return ""
|
||||
result = env_probe.get_environment_probe_line()
|
||||
# Whatever the result is, it must be a string
|
||||
assert isinstance(result, str)
|
||||
247
tools/env_probe.py
Normal file
247
tools/env_probe.py
Normal file
|
|
@ -0,0 +1,247 @@
|
|||
"""Local-environment toolchain probe for the system prompt.
|
||||
|
||||
When the terminal backend is local (the agent's tools run on the same
|
||||
machine as Hermes itself), we surface a single deterministic line about
|
||||
Python tooling state so models don't have to discover it by hitting
|
||||
walls. Common failure modes this addresses:
|
||||
|
||||
* Hermes ships under one Python (e.g. 3.11 in a bundled venv) while the
|
||||
user's login shell has a different one (e.g. 3.12 system). ``pip``
|
||||
resolved from PATH may not match ``python3 -m pip``.
|
||||
* The bundled-venv Python has no pip module installed → ``python3 -m
|
||||
pip`` returns ``No module named pip``.
|
||||
* The system Python is PEP-668 externally-managed → naive
|
||||
``pip install`` fails with ``error: externally-managed-environment``.
|
||||
|
||||
The probe is cheap (a handful of subprocess calls, ~50ms total),
|
||||
cached for the lifetime of the process, and emits **at most one
|
||||
short line** when something non-default is detected. When the
|
||||
environment looks normal (python3+pip both present and matched, no
|
||||
PEP 668), it emits nothing — no token cost.
|
||||
|
||||
Remote terminal backends (docker, modal, ssh, …) are skipped: the
|
||||
host's Python state is irrelevant when tools run inside a sandbox.
|
||||
The sandbox has its own existing probe (``_probe_remote_backend``)
|
||||
in ``agent/prompt_builder.py``.
|
||||
|
||||
Toggle via ``agent.environment_probe`` in config.yaml (default True).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import threading
|
||||
from typing import Optional
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Module-level cache. The probe result is deterministic for the
|
||||
# lifetime of the process — Python install state doesn't change
|
||||
# mid-session in any way that would matter for the system prompt.
|
||||
_CACHE_LOCK = threading.Lock()
|
||||
_CACHED_LINE: Optional[str] = None # None = not probed yet; "" = probed, nothing to say.
|
||||
|
||||
# Remote backends — keep in sync with agent/prompt_builder.py:_REMOTE_TERMINAL_BACKENDS.
|
||||
# Duplicated rather than imported to avoid a circular import (prompt_builder
|
||||
# imports nothing from tools).
|
||||
_REMOTE_BACKENDS = frozenset({
|
||||
"docker", "singularity", "modal", "daytona", "ssh", "managed_modal",
|
||||
})
|
||||
|
||||
|
||||
def _run(cmd: list[str], timeout: float = 3.0) -> tuple[int, str, str]:
|
||||
"""Run a short subprocess. Returns (returncode, stdout, stderr).
|
||||
|
||||
Failures (binary missing, timeout, OSError) return (-1, "", "<reason>").
|
||||
"""
|
||||
try:
|
||||
result = subprocess.run(
|
||||
cmd,
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=timeout,
|
||||
check=False,
|
||||
)
|
||||
return result.returncode, (result.stdout or "").strip(), (result.stderr or "").strip()
|
||||
except FileNotFoundError:
|
||||
return -1, "", "not found"
|
||||
except subprocess.TimeoutExpired:
|
||||
return -1, "", "timeout"
|
||||
except OSError as exc:
|
||||
return -1, "", f"oserror: {exc}"
|
||||
|
||||
|
||||
def _python_version_of(binary: str) -> Optional[str]:
|
||||
"""Return a short version string like ``3.12.4`` for ``binary``, or None."""
|
||||
if not shutil.which(binary):
|
||||
return None
|
||||
rc, out, err = _run([binary, "-c", "import sys; print(f'{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}')"])
|
||||
if rc == 0 and out:
|
||||
return out
|
||||
return None
|
||||
|
||||
|
||||
def _has_pip_module(binary: str) -> bool:
|
||||
"""True if ``<binary> -m pip --version`` succeeds."""
|
||||
if not shutil.which(binary):
|
||||
return False
|
||||
rc, _out, _err = _run([binary, "-m", "pip", "--version"])
|
||||
return rc == 0
|
||||
|
||||
|
||||
def _detect_pep668(binary: str) -> bool:
|
||||
"""True when ``<binary>``'s install location is PEP-668 externally-managed.
|
||||
|
||||
Looks for ``EXTERNALLY-MANAGED`` next to the stdlib (the marker file
|
||||
Debian/Ubuntu drop in to gate naive ``pip install``).
|
||||
"""
|
||||
if not shutil.which(binary):
|
||||
return False
|
||||
code = (
|
||||
"import sys, os;"
|
||||
"stdlib = os.path.dirname(os.__file__);"
|
||||
"marker = os.path.join(stdlib, 'EXTERNALLY-MANAGED');"
|
||||
"print('yes' if os.path.exists(marker) else 'no')"
|
||||
)
|
||||
rc, out, _err = _run([binary, "-c", code])
|
||||
return rc == 0 and out.strip() == "yes"
|
||||
|
||||
|
||||
def _pip_python_version() -> Optional[str]:
|
||||
"""If ``pip`` is on PATH, return the Python version it's bound to.
|
||||
|
||||
``pip --version`` output looks like::
|
||||
|
||||
pip 24.0 from /usr/lib/python3/dist-packages/pip (python 3.12)
|
||||
|
||||
Returns the parenthesised version (e.g. ``"3.12"``) or None.
|
||||
"""
|
||||
if not shutil.which("pip"):
|
||||
return None
|
||||
rc, out, _err = _run(["pip", "--version"])
|
||||
if rc != 0 or not out:
|
||||
return None
|
||||
# Parse trailing "(python X.Y)".
|
||||
if "(python " in out and out.endswith(")"):
|
||||
try:
|
||||
tail = out.rsplit("(python ", 1)[1]
|
||||
return tail[:-1].strip()
|
||||
except (IndexError, AttributeError):
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
def _build_probe_line() -> str:
|
||||
"""Build the one-liner. Returns "" when nothing notable is detected.
|
||||
|
||||
Emit only when SOMETHING is off — the goal is to save the model from
|
||||
hitting an avoidable wall, not to narrate a healthy environment.
|
||||
"""
|
||||
# Bail out if a remote terminal backend is configured; the host's
|
||||
# Python state isn't where the agent's tools run.
|
||||
backend = (os.getenv("TERMINAL_ENV") or "local").strip().lower()
|
||||
if backend in _REMOTE_BACKENDS:
|
||||
return ""
|
||||
|
||||
py3_ver = _python_version_of("python3")
|
||||
py_ver = _python_version_of("python") # for systems with a `python` alias
|
||||
py3_has_pip = _has_pip_module("python3") if py3_ver else False
|
||||
pip_bound_to = _pip_python_version()
|
||||
py3_pep668 = _detect_pep668("python3") if py3_ver else False
|
||||
has_uv = shutil.which("uv") is not None
|
||||
|
||||
# If python3 exists, has pip, has uv (or no PEP 668), and there's no
|
||||
# version mismatch between `pip` and `python3` → environment is
|
||||
# clean enough to stay silent. The model can discover details by
|
||||
# running commands if it cares.
|
||||
mismatch = bool(pip_bound_to and py3_ver and not py3_ver.startswith(pip_bound_to))
|
||||
silent_conditions = (
|
||||
py3_ver is not None
|
||||
and py3_has_pip
|
||||
and not mismatch
|
||||
and (not py3_pep668 or has_uv)
|
||||
)
|
||||
if silent_conditions:
|
||||
return ""
|
||||
|
||||
# Build a compact factual summary. Keep it ONE line so it doesn't
|
||||
# dominate the prompt; the model is good at parsing dense info.
|
||||
bits: list[str] = []
|
||||
if py3_ver:
|
||||
py3_bit = f"python3={py3_ver}"
|
||||
if not py3_has_pip:
|
||||
py3_bit += " (no pip module)"
|
||||
bits.append(py3_bit)
|
||||
else:
|
||||
bits.append("python3=missing")
|
||||
|
||||
if py_ver and py_ver != py3_ver:
|
||||
bits.append(f"python={py_ver}")
|
||||
elif not py_ver and py3_ver:
|
||||
# Common on Debian/Ubuntu — call it out so the model doesn't
|
||||
# type `python` and hit "command not found".
|
||||
bits.append("python=missing (use python3)")
|
||||
|
||||
if pip_bound_to:
|
||||
if mismatch:
|
||||
bits.append(f"pip→python{pip_bound_to} (mismatch)")
|
||||
elif not py3_has_pip:
|
||||
# pip exists but `python3 -m pip` doesn't — the script
|
||||
# works but the module path doesn't.
|
||||
bits.append(f"pip→python{pip_bound_to}")
|
||||
elif py3_has_pip:
|
||||
# `pip` not on PATH but `python3 -m pip` works.
|
||||
pass
|
||||
else:
|
||||
bits.append("pip=missing")
|
||||
|
||||
if py3_pep668:
|
||||
bits.append("PEP 668=yes (use venv or uv)")
|
||||
|
||||
if has_uv:
|
||||
bits.append("uv=installed")
|
||||
|
||||
if not bits:
|
||||
return ""
|
||||
|
||||
return "Python toolchain: " + ", ".join(bits) + "."
|
||||
|
||||
|
||||
def get_environment_probe_line(*, force_refresh: bool = False) -> str:
|
||||
"""Return the cached probe line (building it on first call).
|
||||
|
||||
Returns "" when the environment is clean — the system prompt
|
||||
assembler should drop the section in that case rather than
|
||||
emit an empty heading.
|
||||
|
||||
``force_refresh`` is for tests; real callers should never need it.
|
||||
"""
|
||||
global _CACHED_LINE
|
||||
if force_refresh:
|
||||
with _CACHE_LOCK:
|
||||
_CACHED_LINE = None
|
||||
|
||||
if _CACHED_LINE is not None:
|
||||
return _CACHED_LINE
|
||||
|
||||
with _CACHE_LOCK:
|
||||
if _CACHED_LINE is not None: # raced
|
||||
return _CACHED_LINE
|
||||
try:
|
||||
line = _build_probe_line()
|
||||
except Exception as exc: # never let probe failure block prompt build
|
||||
logger.debug("env_probe failed: %s", exc)
|
||||
line = ""
|
||||
_CACHED_LINE = line
|
||||
return line
|
||||
|
||||
|
||||
def _reset_cache_for_tests() -> None:
|
||||
"""Test helper — clear the cache between probe scenarios."""
|
||||
global _CACHED_LINE
|
||||
with _CACHE_LOCK:
|
||||
_CACHED_LINE = None
|
||||
Loading…
Add table
Add a link
Reference in a new issue