mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
feat(agent): per-turn file-mutation verifier footer (#24498)
Detect when write_file / patch calls fail during a turn and are never superseded by a successful write to the same path. When the final text response is delivered, append an advisory footer listing the files that did NOT change — so models that over-claim 'patched 5 files' after 4 silent failures can't hide the lie. Catches the failure mode reported in Ben Eng's llm-wiki session: grok-4.1-fast issued batches of parallel patches, half failed with 'Could not find old_string', and the agent summarised the turn claiming every file was edited. The user had to manually run 'git status' each turn to catch it. The verifier is a pure post-hoc check on tool results — no new LLM calls, no synthetic messages injected into history (prompt cache preserved), no changes to tool argument dispatch. Per-turn state is keyed by path; a later successful write to the same path clears the failure entry so single-file retry recovery is not flagged. Wired into both _execute_tool_calls_concurrent and _execute_tool_calls_sequential, so batched parallel patches and one-at- a-time edits are both covered. Footer emission happens after the agent loop exits, before transform_llm_output / post_llm_call plugin hooks run, so plugins still see (and can modify) the augmented text. Config: display.file_mutation_verifier (bool, default true) + HERMES_FILE_MUTATION_VERIFIER env override. 31 unit tests in tests/run_agent/test_file_mutation_verifier.py cover target extraction (write_file, patch-replace, patch-v4a single and multi-file), error-preview extraction (JSON .error field and plain string), per-turn state transitions (first-error-wins on repeated failure, success supersedes failure), footer rendering (truncation at 10 entries, user-actionable hint), and env/config precedence. Companion docs updated: user-guide/configuration.md + reference/environment-variables.md.
This commit is contained in:
parent
dd0923bb89
commit
c594a23047
5 changed files with 552 additions and 0 deletions
219
run_agent.py
219
run_agent.py
|
|
@ -347,6 +347,10 @@ _PARALLEL_SAFE_TOOLS = frozenset({
|
|||
# File tools can run concurrently when they target independent paths.
|
||||
_PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"})
|
||||
|
||||
# Tools that mutate files on disk. Used by the per-turn verifier that
|
||||
# surfaces silently-failed file edits so the model can't over-claim success.
|
||||
_FILE_MUTATING_TOOLS = frozenset({"write_file", "patch"})
|
||||
|
||||
# Maximum number of concurrent worker threads for parallel tool execution.
|
||||
_MAX_TOOL_WORKERS = 8
|
||||
|
||||
|
|
@ -524,6 +528,68 @@ def _append_subdir_hint_to_multimodal(value: Dict[str, Any], hint: str) -> None:
|
|||
value["text_summary"] = value["text_summary"] + hint
|
||||
|
||||
|
||||
def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List[str]:
|
||||
"""Return the file paths a ``write_file`` or ``patch`` call is targeting.
|
||||
|
||||
For ``write_file`` and ``patch`` in replace mode this is just ``args["path"]``.
|
||||
For ``patch`` in V4A patch mode we parse the patch content for
|
||||
``*** Update File:`` / ``*** Add File:`` / ``*** Delete File:`` headers so
|
||||
the verifier can track each file in a multi-file patch separately.
|
||||
"""
|
||||
if tool_name not in _FILE_MUTATING_TOOLS:
|
||||
return []
|
||||
if tool_name == "write_file":
|
||||
p = args.get("path")
|
||||
return [str(p)] if p else []
|
||||
# tool_name == "patch"
|
||||
mode = args.get("mode") or "replace"
|
||||
if mode == "replace":
|
||||
p = args.get("path")
|
||||
return [str(p)] if p else []
|
||||
if mode == "patch":
|
||||
body = args.get("patch") or ""
|
||||
if not isinstance(body, str) or not body:
|
||||
return []
|
||||
import re as _re
|
||||
paths: List[str] = []
|
||||
for _m in _re.finditer(
|
||||
r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$',
|
||||
body,
|
||||
_re.MULTILINE,
|
||||
):
|
||||
p = _m.group(1).strip()
|
||||
if p:
|
||||
paths.append(p)
|
||||
return paths
|
||||
return []
|
||||
|
||||
|
||||
def _extract_error_preview(result: Any, max_len: int = 180) -> str:
|
||||
"""Pull a one-line error summary out of a tool result for footer display."""
|
||||
text = _multimodal_text_summary(result) if result is not None else ""
|
||||
if not isinstance(text, str):
|
||||
try:
|
||||
text = str(text)
|
||||
except Exception:
|
||||
return ""
|
||||
# Try to parse JSON and pull the ``error`` field — tool handlers return
|
||||
# ``{"success": false, "error": "..."}``; raw string wins if parse fails.
|
||||
stripped = text.strip()
|
||||
if stripped.startswith("{"):
|
||||
try:
|
||||
import json as _json
|
||||
data = _json.loads(stripped)
|
||||
if isinstance(data, dict) and isinstance(data.get("error"), str):
|
||||
text = data["error"]
|
||||
except Exception:
|
||||
pass
|
||||
# Collapse whitespace, trim to max_len.
|
||||
text = " ".join(text.split())
|
||||
if len(text) > max_len:
|
||||
text = text[: max_len - 1] + "…"
|
||||
return text
|
||||
|
||||
|
||||
def _trajectory_normalize_msg(msg: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Strip image blobs from a message for trajectory saving.
|
||||
|
||||
|
|
@ -5346,6 +5412,103 @@ class AIAgent:
|
|||
self._pending_steer = None
|
||||
return text
|
||||
|
||||
def _record_file_mutation_result(
|
||||
self,
|
||||
tool_name: str,
|
||||
args: Dict[str, Any],
|
||||
result: Any,
|
||||
is_error: bool,
|
||||
) -> None:
|
||||
"""Record a ``write_file`` / ``patch`` outcome for the turn-end verifier.
|
||||
|
||||
On failure, store ``{path: {error_preview, tool}}`` entries. On
|
||||
success, remove any prior failure entries for the same paths (the
|
||||
model recovered within the turn). Silently no-ops if the per-turn
|
||||
state dict hasn't been initialised yet (e.g. a tool dispatched
|
||||
outside ``run_conversation``).
|
||||
"""
|
||||
if tool_name not in _FILE_MUTATING_TOOLS:
|
||||
return
|
||||
state = getattr(self, "_turn_failed_file_mutations", None)
|
||||
if state is None:
|
||||
return
|
||||
targets = _extract_file_mutation_targets(tool_name, args)
|
||||
if not targets:
|
||||
return
|
||||
if is_error:
|
||||
preview = _extract_error_preview(result)
|
||||
for path in targets:
|
||||
# Keep the FIRST error we saw for a given path unless we
|
||||
# later see success. A repeated failure with a different
|
||||
# message shouldn't silently overwrite the original.
|
||||
if path not in state:
|
||||
state[path] = {
|
||||
"tool": tool_name,
|
||||
"error_preview": preview,
|
||||
}
|
||||
else:
|
||||
for path in targets:
|
||||
state.pop(path, None)
|
||||
|
||||
def _file_mutation_verifier_enabled(self) -> bool:
|
||||
"""Check whether the per-turn file-mutation verifier footer is on.
|
||||
|
||||
Config path: ``display.file_mutation_verifier`` (bool, default True).
|
||||
``HERMES_FILE_MUTATION_VERIFIER`` env var overrides config. Exposed
|
||||
as a method so tests can patch a single seam without reaching into
|
||||
the private ``_turn_failed_file_mutations`` state dict.
|
||||
"""
|
||||
try:
|
||||
import os as _os
|
||||
env = _os.environ.get("HERMES_FILE_MUTATION_VERIFIER")
|
||||
if env is not None:
|
||||
return env.strip().lower() not in ("0", "false", "no", "off")
|
||||
# Read from the persisted config.yaml so gateway and CLI share
|
||||
# the same setting. Import lazily to avoid a startup-time cycle.
|
||||
try:
|
||||
from hermes_cli.config import load_config as _load_config
|
||||
_cfg = _load_config() or {}
|
||||
except Exception:
|
||||
_cfg = {}
|
||||
_display = _cfg.get("display") if isinstance(_cfg, dict) else None
|
||||
if isinstance(_display, dict) and "file_mutation_verifier" in _display:
|
||||
return bool(_display.get("file_mutation_verifier"))
|
||||
except Exception:
|
||||
pass
|
||||
return True # safe default: verifier on
|
||||
|
||||
@staticmethod
|
||||
def _format_file_mutation_failure_footer(failed: Dict[str, Dict[str, Any]]) -> str:
|
||||
"""Render the per-turn failed-mutation dict as a user-facing footer.
|
||||
|
||||
Displays up to 10 paths with their first error preview, then a
|
||||
count of any additional failures. Returns an empty string when
|
||||
the dict is empty so callers can concatenate unconditionally.
|
||||
"""
|
||||
if not failed:
|
||||
return ""
|
||||
lines = [
|
||||
"⚠️ File-mutation verifier: "
|
||||
f"{len(failed)} file(s) were NOT modified this turn despite any "
|
||||
"wording above that may suggest otherwise. Run `git status` or "
|
||||
"`read_file` to confirm."
|
||||
]
|
||||
shown = 0
|
||||
for path, info in failed.items():
|
||||
if shown >= 10:
|
||||
break
|
||||
preview = (info.get("error_preview") or "").strip()
|
||||
tool = info.get("tool") or "patch"
|
||||
if preview:
|
||||
lines.append(f" • {path} — [{tool}] {preview}")
|
||||
else:
|
||||
lines.append(f" • {path} — [{tool}] failed")
|
||||
shown += 1
|
||||
remaining = len(failed) - shown
|
||||
if remaining > 0:
|
||||
lines.append(f" • … and {remaining} more")
|
||||
return "\n".join(lines)
|
||||
|
||||
def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None:
|
||||
"""Append any pending /steer text to the last tool result in this turn.
|
||||
|
||||
|
|
@ -10872,6 +11035,17 @@ class AIAgent:
|
|||
result_preview = _err_text[:200] if len(_err_text) > 200 else _err_text
|
||||
logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview)
|
||||
|
||||
# Track file-mutation outcome for the turn-end verifier.
|
||||
# `blocked` calls never actually ran — don't let a guardrail
|
||||
# block count as either a failure or a success.
|
||||
if not blocked:
|
||||
try:
|
||||
self._record_file_mutation_result(
|
||||
function_name, function_args, function_result, is_error,
|
||||
)
|
||||
except Exception as _ver_err:
|
||||
logging.debug("file-mutation verifier record failed: %s", _ver_err)
|
||||
|
||||
if not blocked and self.tool_progress_callback:
|
||||
try:
|
||||
self.tool_progress_callback(
|
||||
|
|
@ -11298,6 +11472,18 @@ class AIAgent:
|
|||
else:
|
||||
logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, _result_len)
|
||||
|
||||
# Track file-mutation outcome for the turn-end verifier. See
|
||||
# the concurrent path for the rationale; both paths must feed
|
||||
# the same state so the footer reflects every tool call in the
|
||||
# turn, not just the parallel ones.
|
||||
if not _execution_blocked:
|
||||
try:
|
||||
self._record_file_mutation_result(
|
||||
function_name, function_args, function_result, _is_error_result,
|
||||
)
|
||||
except Exception as _ver_err:
|
||||
logging.debug("file-mutation verifier record failed: %s", _ver_err)
|
||||
|
||||
if not _execution_blocked and self.tool_progress_callback:
|
||||
try:
|
||||
self.tool_progress_callback(
|
||||
|
|
@ -11995,6 +12181,14 @@ class AIAgent:
|
|||
truncated_response_prefix = ""
|
||||
compression_attempts = 0
|
||||
_turn_exit_reason = "unknown" # Diagnostic: why the loop ended
|
||||
|
||||
# Per-turn file-mutation verifier state. Keyed by resolved path;
|
||||
# each failed ``write_file`` / ``patch`` call records the error
|
||||
# preview. Later successful writes to the same path remove the
|
||||
# entry (the model recovered). At end-of-turn, any entries still
|
||||
# present are surfaced in an advisory footer so the model cannot
|
||||
# over-claim success while the file is actually unchanged on disk.
|
||||
self._turn_failed_file_mutations: Dict[str, Dict[str, Any]] = {}
|
||||
|
||||
# Record the execution thread so interrupt()/clear_interrupt() can
|
||||
# scope the tool-level interrupt signal to THIS agent's thread only.
|
||||
|
|
@ -15310,6 +15504,31 @@ class AIAgent:
|
|||
else:
|
||||
logger.info(_diag_msg, *_diag_args)
|
||||
|
||||
# File-mutation verifier footer.
|
||||
# If one or more ``write_file`` / ``patch`` calls failed during this
|
||||
# turn and were never superseded by a successful write to the same
|
||||
# path, append an advisory footer to the assistant response. This
|
||||
# catches the specific case — reported by Ben Eng (#15524-adjacent)
|
||||
# — where a model issues a batch of parallel patches, half of them
|
||||
# fail with "Could not find old_string", and the model summarises
|
||||
# the turn claiming every file was edited. The user then has to
|
||||
# manually run ``git status`` to catch the lie. With this footer
|
||||
# the truth is surfaced on every turn, so over-claiming is
|
||||
# structurally impossible past the model.
|
||||
#
|
||||
# Gate: only applied when a real text response exists for this
|
||||
# turn and the user didn't interrupt. Empty/interrupted turns
|
||||
# already have other surface text that shouldn't be augmented.
|
||||
if final_response and not interrupted:
|
||||
try:
|
||||
_failed = getattr(self, "_turn_failed_file_mutations", None) or {}
|
||||
if _failed and self._file_mutation_verifier_enabled():
|
||||
footer = self._format_file_mutation_failure_footer(_failed)
|
||||
if footer:
|
||||
final_response = final_response.rstrip() + "\n\n" + footer
|
||||
except Exception as _ver_err:
|
||||
logger.debug("file-mutation verifier footer failed: %s", _ver_err)
|
||||
|
||||
# Plugin hook: transform_llm_output
|
||||
# Fired once per turn after the tool-calling loop completes.
|
||||
# Plugins can transform the LLM's output text before it's returned.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue