From c594a2304734b708e7ebc68d4fe2eff1bb57abbc Mon Sep 17 00:00:00 2001 From: Teknium <127238744+teknium1@users.noreply.github.com> Date: Tue, 12 May 2026 11:54:13 -0700 Subject: [PATCH] feat(agent): per-turn file-mutation verifier footer (#24498) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Detect when write_file / patch calls fail during a turn and are never superseded by a successful write to the same path. When the final text response is delivered, append an advisory footer listing the files that did NOT change — so models that over-claim 'patched 5 files' after 4 silent failures can't hide the lie. Catches the failure mode reported in Ben Eng's llm-wiki session: grok-4.1-fast issued batches of parallel patches, half failed with 'Could not find old_string', and the agent summarised the turn claiming every file was edited. The user had to manually run 'git status' each turn to catch it. The verifier is a pure post-hoc check on tool results — no new LLM calls, no synthetic messages injected into history (prompt cache preserved), no changes to tool argument dispatch. Per-turn state is keyed by path; a later successful write to the same path clears the failure entry so single-file retry recovery is not flagged. Wired into both _execute_tool_calls_concurrent and _execute_tool_calls_sequential, so batched parallel patches and one-at- a-time edits are both covered. Footer emission happens after the agent loop exits, before transform_llm_output / post_llm_call plugin hooks run, so plugins still see (and can modify) the augmented text. Config: display.file_mutation_verifier (bool, default true) + HERMES_FILE_MUTATION_VERIFIER env override. 31 unit tests in tests/run_agent/test_file_mutation_verifier.py cover target extraction (write_file, patch-replace, patch-v4a single and multi-file), error-preview extraction (JSON .error field and plain string), per-turn state transitions (first-error-wins on repeated failure, success supersedes failure), footer rendering (truncation at 10 entries, user-actionable hint), and env/config precedence. Companion docs updated: user-guide/configuration.md + reference/environment-variables.md. --- hermes_cli/config.py | 8 + run_agent.py | 219 +++++++++++++ .../run_agent/test_file_mutation_verifier.py | 308 ++++++++++++++++++ .../docs/reference/environment-variables.md | 1 + website/docs/user-guide/configuration.md | 16 + 5 files changed, 552 insertions(+) create mode 100644 tests/run_agent/test_file_mutation_verifier.py diff --git a/hermes_cli/config.py b/hermes_cli/config.py index d7585dc3010..c7946872bf2 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -917,6 +917,14 @@ DEFAULT_CONFIG = { "persistent_output": True, "persistent_output_max_lines": 200, "inline_diffs": True, # Show inline diff previews for write actions (write_file, patch, skill_manage) + # File-mutation verifier footer. When true (default), the agent + # appends a one-line advisory to its final response whenever a + # write_file / patch call failed during the turn and was never + # superseded by a successful write to the same path. This catches + # the "batch of parallel patches, half fail, model claims success" + # class of over-claim that otherwise forces users to run + # `git status` to verify edits landed. Set false to suppress. + "file_mutation_verifier": True, "show_cost": False, # Show $ cost in the status bar (off by default) "skin": "default", # UI language for static user-facing messages (approval prompts, a diff --git a/run_agent.py b/run_agent.py index 973f0d95d72..a8b071c8724 100644 --- a/run_agent.py +++ b/run_agent.py @@ -347,6 +347,10 @@ _PARALLEL_SAFE_TOOLS = frozenset({ # File tools can run concurrently when they target independent paths. _PATH_SCOPED_TOOLS = frozenset({"read_file", "write_file", "patch"}) +# Tools that mutate files on disk. Used by the per-turn verifier that +# surfaces silently-failed file edits so the model can't over-claim success. +_FILE_MUTATING_TOOLS = frozenset({"write_file", "patch"}) + # Maximum number of concurrent worker threads for parallel tool execution. _MAX_TOOL_WORKERS = 8 @@ -524,6 +528,68 @@ def _append_subdir_hint_to_multimodal(value: Dict[str, Any], hint: str) -> None: value["text_summary"] = value["text_summary"] + hint +def _extract_file_mutation_targets(tool_name: str, args: Dict[str, Any]) -> List[str]: + """Return the file paths a ``write_file`` or ``patch`` call is targeting. + + For ``write_file`` and ``patch`` in replace mode this is just ``args["path"]``. + For ``patch`` in V4A patch mode we parse the patch content for + ``*** Update File:`` / ``*** Add File:`` / ``*** Delete File:`` headers so + the verifier can track each file in a multi-file patch separately. + """ + if tool_name not in _FILE_MUTATING_TOOLS: + return [] + if tool_name == "write_file": + p = args.get("path") + return [str(p)] if p else [] + # tool_name == "patch" + mode = args.get("mode") or "replace" + if mode == "replace": + p = args.get("path") + return [str(p)] if p else [] + if mode == "patch": + body = args.get("patch") or "" + if not isinstance(body, str) or not body: + return [] + import re as _re + paths: List[str] = [] + for _m in _re.finditer( + r'^\*\*\*\s+(?:Update|Add|Delete)\s+File:\s*(.+)$', + body, + _re.MULTILINE, + ): + p = _m.group(1).strip() + if p: + paths.append(p) + return paths + return [] + + +def _extract_error_preview(result: Any, max_len: int = 180) -> str: + """Pull a one-line error summary out of a tool result for footer display.""" + text = _multimodal_text_summary(result) if result is not None else "" + if not isinstance(text, str): + try: + text = str(text) + except Exception: + return "" + # Try to parse JSON and pull the ``error`` field — tool handlers return + # ``{"success": false, "error": "..."}``; raw string wins if parse fails. + stripped = text.strip() + if stripped.startswith("{"): + try: + import json as _json + data = _json.loads(stripped) + if isinstance(data, dict) and isinstance(data.get("error"), str): + text = data["error"] + except Exception: + pass + # Collapse whitespace, trim to max_len. + text = " ".join(text.split()) + if len(text) > max_len: + text = text[: max_len - 1] + "…" + return text + + def _trajectory_normalize_msg(msg: Dict[str, Any]) -> Dict[str, Any]: """Strip image blobs from a message for trajectory saving. @@ -5346,6 +5412,103 @@ class AIAgent: self._pending_steer = None return text + def _record_file_mutation_result( + self, + tool_name: str, + args: Dict[str, Any], + result: Any, + is_error: bool, + ) -> None: + """Record a ``write_file`` / ``patch`` outcome for the turn-end verifier. + + On failure, store ``{path: {error_preview, tool}}`` entries. On + success, remove any prior failure entries for the same paths (the + model recovered within the turn). Silently no-ops if the per-turn + state dict hasn't been initialised yet (e.g. a tool dispatched + outside ``run_conversation``). + """ + if tool_name not in _FILE_MUTATING_TOOLS: + return + state = getattr(self, "_turn_failed_file_mutations", None) + if state is None: + return + targets = _extract_file_mutation_targets(tool_name, args) + if not targets: + return + if is_error: + preview = _extract_error_preview(result) + for path in targets: + # Keep the FIRST error we saw for a given path unless we + # later see success. A repeated failure with a different + # message shouldn't silently overwrite the original. + if path not in state: + state[path] = { + "tool": tool_name, + "error_preview": preview, + } + else: + for path in targets: + state.pop(path, None) + + def _file_mutation_verifier_enabled(self) -> bool: + """Check whether the per-turn file-mutation verifier footer is on. + + Config path: ``display.file_mutation_verifier`` (bool, default True). + ``HERMES_FILE_MUTATION_VERIFIER`` env var overrides config. Exposed + as a method so tests can patch a single seam without reaching into + the private ``_turn_failed_file_mutations`` state dict. + """ + try: + import os as _os + env = _os.environ.get("HERMES_FILE_MUTATION_VERIFIER") + if env is not None: + return env.strip().lower() not in ("0", "false", "no", "off") + # Read from the persisted config.yaml so gateway and CLI share + # the same setting. Import lazily to avoid a startup-time cycle. + try: + from hermes_cli.config import load_config as _load_config + _cfg = _load_config() or {} + except Exception: + _cfg = {} + _display = _cfg.get("display") if isinstance(_cfg, dict) else None + if isinstance(_display, dict) and "file_mutation_verifier" in _display: + return bool(_display.get("file_mutation_verifier")) + except Exception: + pass + return True # safe default: verifier on + + @staticmethod + def _format_file_mutation_failure_footer(failed: Dict[str, Dict[str, Any]]) -> str: + """Render the per-turn failed-mutation dict as a user-facing footer. + + Displays up to 10 paths with their first error preview, then a + count of any additional failures. Returns an empty string when + the dict is empty so callers can concatenate unconditionally. + """ + if not failed: + return "" + lines = [ + "⚠️ File-mutation verifier: " + f"{len(failed)} file(s) were NOT modified this turn despite any " + "wording above that may suggest otherwise. Run `git status` or " + "`read_file` to confirm." + ] + shown = 0 + for path, info in failed.items(): + if shown >= 10: + break + preview = (info.get("error_preview") or "").strip() + tool = info.get("tool") or "patch" + if preview: + lines.append(f" • {path} — [{tool}] {preview}") + else: + lines.append(f" • {path} — [{tool}] failed") + shown += 1 + remaining = len(failed) - shown + if remaining > 0: + lines.append(f" • … and {remaining} more") + return "\n".join(lines) + def _apply_pending_steer_to_tool_results(self, messages: list, num_tool_msgs: int) -> None: """Append any pending /steer text to the last tool result in this turn. @@ -10872,6 +11035,17 @@ class AIAgent: result_preview = _err_text[:200] if len(_err_text) > 200 else _err_text logger.warning("Tool %s returned error (%.2fs): %s", function_name, tool_duration, result_preview) + # Track file-mutation outcome for the turn-end verifier. + # `blocked` calls never actually ran — don't let a guardrail + # block count as either a failure or a success. + if not blocked: + try: + self._record_file_mutation_result( + function_name, function_args, function_result, is_error, + ) + except Exception as _ver_err: + logging.debug("file-mutation verifier record failed: %s", _ver_err) + if not blocked and self.tool_progress_callback: try: self.tool_progress_callback( @@ -11298,6 +11472,18 @@ class AIAgent: else: logger.info("tool %s completed (%.2fs, %d chars)", function_name, tool_duration, _result_len) + # Track file-mutation outcome for the turn-end verifier. See + # the concurrent path for the rationale; both paths must feed + # the same state so the footer reflects every tool call in the + # turn, not just the parallel ones. + if not _execution_blocked: + try: + self._record_file_mutation_result( + function_name, function_args, function_result, _is_error_result, + ) + except Exception as _ver_err: + logging.debug("file-mutation verifier record failed: %s", _ver_err) + if not _execution_blocked and self.tool_progress_callback: try: self.tool_progress_callback( @@ -11995,6 +12181,14 @@ class AIAgent: truncated_response_prefix = "" compression_attempts = 0 _turn_exit_reason = "unknown" # Diagnostic: why the loop ended + + # Per-turn file-mutation verifier state. Keyed by resolved path; + # each failed ``write_file`` / ``patch`` call records the error + # preview. Later successful writes to the same path remove the + # entry (the model recovered). At end-of-turn, any entries still + # present are surfaced in an advisory footer so the model cannot + # over-claim success while the file is actually unchanged on disk. + self._turn_failed_file_mutations: Dict[str, Dict[str, Any]] = {} # Record the execution thread so interrupt()/clear_interrupt() can # scope the tool-level interrupt signal to THIS agent's thread only. @@ -15310,6 +15504,31 @@ class AIAgent: else: logger.info(_diag_msg, *_diag_args) + # File-mutation verifier footer. + # If one or more ``write_file`` / ``patch`` calls failed during this + # turn and were never superseded by a successful write to the same + # path, append an advisory footer to the assistant response. This + # catches the specific case — reported by Ben Eng (#15524-adjacent) + # — where a model issues a batch of parallel patches, half of them + # fail with "Could not find old_string", and the model summarises + # the turn claiming every file was edited. The user then has to + # manually run ``git status`` to catch the lie. With this footer + # the truth is surfaced on every turn, so over-claiming is + # structurally impossible past the model. + # + # Gate: only applied when a real text response exists for this + # turn and the user didn't interrupt. Empty/interrupted turns + # already have other surface text that shouldn't be augmented. + if final_response and not interrupted: + try: + _failed = getattr(self, "_turn_failed_file_mutations", None) or {} + if _failed and self._file_mutation_verifier_enabled(): + footer = self._format_file_mutation_failure_footer(_failed) + if footer: + final_response = final_response.rstrip() + "\n\n" + footer + except Exception as _ver_err: + logger.debug("file-mutation verifier footer failed: %s", _ver_err) + # Plugin hook: transform_llm_output # Fired once per turn after the tool-calling loop completes. # Plugins can transform the LLM's output text before it's returned. diff --git a/tests/run_agent/test_file_mutation_verifier.py b/tests/run_agent/test_file_mutation_verifier.py new file mode 100644 index 00000000000..fca002d2314 --- /dev/null +++ b/tests/run_agent/test_file_mutation_verifier.py @@ -0,0 +1,308 @@ +"""Tests for the per-turn file-mutation verifier footer. + +Covers the three moving pieces: + +1. ``_extract_file_mutation_targets`` — pulls file paths from write_file / + patch (replace + V4A) tool-call argument dicts. +2. ``AIAgent._record_file_mutation_result`` — builds the per-turn state + dict, removing entries when a later success supersedes an earlier + failure for the same path. +3. ``AIAgent._format_file_mutation_failure_footer`` — renders the dict + as a user-visible advisory. + +Regression target: the "Ben Eng llm-wiki" session where grok-4.1-fast +batched parallel patches, half failed, and the model summarised the +turn claiming every file was edited. This verifier makes over-claiming +structurally impossible past the model: the user always sees the real +list of files that did NOT change. +""" + +from __future__ import annotations + +import json + +import pytest + +from run_agent import ( + AIAgent, + _FILE_MUTATING_TOOLS, + _extract_error_preview, + _extract_file_mutation_targets, +) + + +# --------------------------------------------------------------------------- +# _extract_file_mutation_targets +# --------------------------------------------------------------------------- + + +class TestExtractFileMutationTargets: + def test_non_mutating_tool_returns_empty(self): + assert _extract_file_mutation_targets("read_file", {"path": "/x"}) == [] + assert _extract_file_mutation_targets("terminal", {"command": "ls"}) == [] + + def test_write_file_returns_single_path(self): + out = _extract_file_mutation_targets("write_file", {"path": "/tmp/a.md", "content": "x"}) + assert out == ["/tmp/a.md"] + + def test_write_file_missing_path_returns_empty(self): + assert _extract_file_mutation_targets("write_file", {"content": "x"}) == [] + + def test_patch_replace_mode_returns_path(self): + args = {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"} + assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"] + + def test_patch_default_mode_is_replace(self): + # Mode omitted — schema default is ``replace``. + args = {"path": "/tmp/a.md", "old_string": "x", "new_string": "y"} + assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"] + + def test_patch_v4a_single_file(self): + body = ( + "*** Begin Patch\n" + "*** Update File: /tmp/a.md\n" + "@@ ctx @@\n" + " line1\n" + "-bad\n" + "+good\n" + "*** End Patch\n" + ) + args = {"mode": "patch", "patch": body} + assert _extract_file_mutation_targets("patch", args) == ["/tmp/a.md"] + + def test_patch_v4a_multi_file(self): + body = ( + "*** Begin Patch\n" + "*** Update File: /tmp/a.md\n" + "@@ @@\n-a\n+b\n" + "*** Add File: /tmp/new.md\n" + "+fresh\n" + "*** Delete File: /tmp/old.md\n" + "*** End Patch\n" + ) + args = {"mode": "patch", "patch": body} + paths = _extract_file_mutation_targets("patch", args) + assert paths == ["/tmp/a.md", "/tmp/new.md", "/tmp/old.md"] + + def test_patch_v4a_missing_body_returns_empty(self): + assert _extract_file_mutation_targets("patch", {"mode": "patch"}) == [] + assert _extract_file_mutation_targets("patch", {"mode": "patch", "patch": ""}) == [] + + +# --------------------------------------------------------------------------- +# _extract_error_preview +# --------------------------------------------------------------------------- + + +class TestExtractErrorPreview: + def test_json_error_field_preferred(self): + raw = json.dumps({"success": False, "error": "Could not find old_string in /tmp/x"}) + assert _extract_error_preview(raw) == "Could not find old_string in /tmp/x" + + def test_plain_string_falls_through(self): + assert _extract_error_preview("Error executing tool: boom") == "Error executing tool: boom" + + def test_long_preview_truncated(self): + long = "x" * 500 + out = _extract_error_preview(long, max_len=50) + assert len(out) <= 50 + assert out.endswith("…") + + def test_none_returns_empty(self): + assert _extract_error_preview(None) == "" + + +# --------------------------------------------------------------------------- +# _record_file_mutation_result — state transitions +# --------------------------------------------------------------------------- + + +def _bare_agent() -> AIAgent: + """Skip __init__ and only attach the per-turn state dict. + + AIAgent.__init__ takes ~60 parameters and touches network, auth, and + the filesystem. For these tests we only need the two methods — + ``_record_file_mutation_result`` and ``_format_file_mutation_failure_footer``. + Using ``object.__new__`` mirrors the gateway-test pattern documented in + the agent pitfalls list. + """ + agent = object.__new__(AIAgent) + agent._turn_failed_file_mutations = {} + return agent + + +class TestRecordFileMutationResult: + def test_non_mutating_tool_ignored(self): + agent = _bare_agent() + agent._record_file_mutation_result( + "read_file", {"path": "/tmp/x"}, "{}", is_error=True, + ) + assert agent._turn_failed_file_mutations == {} + + def test_failure_recorded(self): + agent = _bare_agent() + result = json.dumps({"success": False, "error": "Could not find old_string"}) + agent._record_file_mutation_result( + "patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"}, + result, is_error=True, + ) + state = agent._turn_failed_file_mutations + assert "/tmp/a.md" in state + assert state["/tmp/a.md"]["tool"] == "patch" + assert "Could not find old_string" in state["/tmp/a.md"]["error_preview"] + + def test_success_removes_prior_failure(self): + agent = _bare_agent() + # First attempt fails + agent._record_file_mutation_result( + "patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "x", "new_string": "y"}, + json.dumps({"error": "not found"}), is_error=True, + ) + assert "/tmp/a.md" in agent._turn_failed_file_mutations + # Second attempt with corrected old_string succeeds + agent._record_file_mutation_result( + "patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "real", "new_string": "fixed"}, + json.dumps({"success": True, "diff": "..."}), is_error=False, + ) + assert agent._turn_failed_file_mutations == {} + + def test_repeated_failure_keeps_first_error(self): + agent = _bare_agent() + agent._record_file_mutation_result( + "patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "v1", "new_string": "y"}, + json.dumps({"error": "first error"}), is_error=True, + ) + agent._record_file_mutation_result( + "patch", {"mode": "replace", "path": "/tmp/a.md", "old_string": "v2", "new_string": "y"}, + json.dumps({"error": "second error"}), is_error=True, + ) + # Keep the original error — swapping to the latest would obscure + # the initial root cause. + assert "first error" in agent._turn_failed_file_mutations["/tmp/a.md"]["error_preview"] + + def test_v4a_multi_file_all_tracked(self): + agent = _bare_agent() + body = ( + "*** Begin Patch\n" + "*** Update File: /tmp/a.md\n@@ @@\n-a\n+b\n" + "*** Update File: /tmp/b.md\n@@ @@\n-a\n+b\n" + "*** End Patch\n" + ) + agent._record_file_mutation_result( + "patch", {"mode": "patch", "patch": body}, + json.dumps({"error": "parse failure"}), is_error=True, + ) + assert set(agent._turn_failed_file_mutations) == {"/tmp/a.md", "/tmp/b.md"} + + def test_no_state_dict_silent_noop(self): + """When called outside run_conversation the state dict is absent. + + The record helper must never raise — a tool dispatched from, say, + a direct ``chat()`` call should not blow up the call site just + because the verifier state hasn't been initialised. + """ + agent = object.__new__(AIAgent) # no state attached + # Should not raise + agent._record_file_mutation_result( + "patch", {"mode": "replace", "path": "/tmp/a.md"}, + json.dumps({"error": "x"}), is_error=True, + ) + + def test_missing_path_arg_recorded_nowhere(self): + agent = _bare_agent() + agent._record_file_mutation_result( + "patch", {"mode": "replace"}, # no path + json.dumps({"error": "path required"}), is_error=True, + ) + # No path → nothing to key on, state stays empty. The per-turn + # state is about file paths, not individual tool-call IDs. + assert agent._turn_failed_file_mutations == {} + + +# --------------------------------------------------------------------------- +# _format_file_mutation_failure_footer +# --------------------------------------------------------------------------- + + +class TestFormatFooter: + def test_empty_returns_empty_string(self): + assert AIAgent._format_file_mutation_failure_footer({}) == "" + + def test_single_failure(self): + out = AIAgent._format_file_mutation_failure_footer( + {"/tmp/a.md": {"tool": "patch", "error_preview": "Could not find old_string"}}, + ) + assert "1 file(s) were NOT modified" in out + assert "/tmp/a.md" in out + assert "Could not find old_string" in out + assert "git status" in out # user-actionable hint + + def test_truncation_at_10_entries(self): + failed = { + f"/tmp/f{i}.md": {"tool": "patch", "error_preview": "err"} + for i in range(15) + } + out = AIAgent._format_file_mutation_failure_footer(failed) + assert "15 file(s) were NOT modified" in out + assert "… and 5 more" in out + # Ten file bullets + header + "and X more" line + lines = out.split("\n") + bullet_lines = [ln for ln in lines if ln.lstrip().startswith("•")] + assert len(bullet_lines) == 11 # 10 shown + 1 summary + + +# --------------------------------------------------------------------------- +# _file_mutation_verifier_enabled — env + config precedence +# --------------------------------------------------------------------------- + + +class TestVerifierEnabled: + def test_default_is_enabled(self, monkeypatch): + monkeypatch.delenv("HERMES_FILE_MUTATION_VERIFIER", raising=False) + agent = _bare_agent() + # With no env and no config present, safe default is True. + # load_config may surface a user config.yaml in some envs — stub it. + import hermes_cli.config as _cfg_mod + monkeypatch.setattr(_cfg_mod, "load_config", lambda: {}) + assert agent._file_mutation_verifier_enabled() is True + + @pytest.mark.parametrize("value", ["0", "false", "FALSE", "no", "off"]) + def test_env_disables(self, monkeypatch, value): + monkeypatch.setenv("HERMES_FILE_MUTATION_VERIFIER", value) + agent = _bare_agent() + assert agent._file_mutation_verifier_enabled() is False + + def test_env_enables_over_config(self, monkeypatch): + monkeypatch.setenv("HERMES_FILE_MUTATION_VERIFIER", "1") + import hermes_cli.config as _cfg_mod + monkeypatch.setattr( + _cfg_mod, "load_config", + lambda: {"display": {"file_mutation_verifier": False}}, + ) + agent = _bare_agent() + assert agent._file_mutation_verifier_enabled() is True + + def test_config_disables_when_no_env(self, monkeypatch): + monkeypatch.delenv("HERMES_FILE_MUTATION_VERIFIER", raising=False) + import hermes_cli.config as _cfg_mod + monkeypatch.setattr( + _cfg_mod, "load_config", + lambda: {"display": {"file_mutation_verifier": False}}, + ) + agent = _bare_agent() + assert agent._file_mutation_verifier_enabled() is False + + +# --------------------------------------------------------------------------- +# Module-level invariants +# --------------------------------------------------------------------------- + + +def test_file_mutating_tools_set_shape(): + """write_file + patch are the only tools the verifier tracks. + + Guard rail: if someone adds a third file-mutating tool (e.g. a new + ``append_file``), they should also audit whether the verifier should + track it. This test fails loudly on unilateral additions. + """ + assert _FILE_MUTATING_TOOLS == frozenset({"write_file", "patch"}) diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 9d7208883b7..eda0c2863a7 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -490,6 +490,7 @@ Advanced per-platform knobs for throttling the outbound message batcher. Most us | `HERMES_GATEWAY_PLATFORM_CONNECT_TIMEOUT` | Per-platform connect timeout during gateway startup (seconds). | | `HERMES_GATEWAY_BUSY_INPUT_MODE` | Default gateway busy-input behavior: `queue`, `steer`, or `interrupt`. Can be overridden per chat with `/busy`. | | `HERMES_GATEWAY_BUSY_ACK_ENABLED` | Whether the gateway sends an acknowledgment message (⚡/⏳/⏩) when a user sends input while the agent is busy (default: `true`). Set to `false` to suppress these messages entirely — the input is still queued/steered/interrupts as normal, only the chat reply is silenced. Bridged from `display.busy_ack_enabled` in `config.yaml`. | +| `HERMES_FILE_MUTATION_VERIFIER` | Enable the per-turn file-mutation verifier footer (default: `true`). When enabled, Hermes appends an advisory listing any `write_file` / `patch` calls that failed during the turn and were not superseded by a successful write. Set to `0`, `false`, `no`, or `off` to suppress. Mirrors `display.file_mutation_verifier` in `config.yaml`; the env var wins when set. | | `HERMES_CRON_TIMEOUT` | Inactivity timeout for cron job agent runs in seconds (default: `600`). The agent can run indefinitely while actively calling tools or receiving stream tokens — this only triggers when idle. Set to `0` for unlimited. | | `HERMES_CRON_SCRIPT_TIMEOUT` | Timeout for pre-run scripts attached to cron jobs in seconds (default: `120`). Override for scripts that need longer execution (e.g., randomized delays for anti-bot timing). Also configurable via `cron.script_timeout_seconds` in `config.yaml`. | | `HERMES_CRON_MAX_PARALLEL` | Max cron jobs run in parallel per tick (default: `4`). | diff --git a/website/docs/user-guide/configuration.md b/website/docs/user-guide/configuration.md index ed94dfb0ed7..14f80d4d97a 100644 --- a/website/docs/user-guide/configuration.md +++ b/website/docs/user-guide/configuration.md @@ -1204,9 +1204,25 @@ display: runtime_footer: # Gateway: append a runtime-context footer to final replies enabled: false fields: ["model", "context_pct", "cwd"] + file_mutation_verifier: true # Append an advisory footer when write_file/patch calls failed this turn language: en # UI language for static messages (approval prompts, some gateway replies). en | zh | ja | de | es | fr | tr | uk ``` +### File-mutation verifier + +When `display.file_mutation_verifier` is `true` (default), Hermes appends a one-line advisory to the assistant's final response whenever a `write_file` or `patch` call failed during the turn and was never superseded by a successful write to the same path. This catches the "batch of parallel patches, half silently fail, model summarises success" class of over-claim without requiring you to manually run `git status` after every edit. + +Example footer: + +``` +⚠️ File-mutation verifier: 3 file(s) were NOT modified this turn despite any wording above that may suggest otherwise. Run `git status` or `read_file` to confirm. + • concepts/automatic-organization.md — [patch] Could not find match for old_string + • concepts/lora.md — [patch] Could not find match for old_string + • concepts/rag-pipeline.md — [patch] Could not find match for old_string +``` + +Set `file_mutation_verifier: false` (or `HERMES_FILE_MUTATION_VERIFIER=0`) to suppress the footer. The verifier only fires when real failures are outstanding at turn end — a model that retries a failed patch and succeeds within the same turn will not trigger it for that file. + ### UI language for static messages The `display.language` setting translates a small set of static user-facing messages — the CLI approval prompt, a handful of gateway slash-command replies (e.g. restart-drain notices, "approval expired", "goal cleared"). It does **not** translate agent responses, log lines, tool output, error tracebacks, or slash-command descriptions — those stay in English. If you want the agent itself to reply in another language, just tell it in your prompt or system message.