mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
feat(delegate): cross-agent file state coordination for concurrent subagents (#13718)
* feat(models): hide OpenRouter models that don't advertise tool support Port from Kilo-Org/kilocode#9068. hermes-agent is tool-calling-first — every provider path assumes the model can invoke tools. Models whose OpenRouter supported_parameters doesn't include 'tools' (e.g. image-only or completion-only models) cannot be driven by the agent loop and fail at the first tool call. Filter them out of fetch_openrouter_models() so they never appear in the model picker (`hermes model`, setup wizard, /model slash command). Permissive when the field is missing — OpenRouter-compatible gateways (Nous Portal, private mirrors, older snapshots) don't always populate supported_parameters. Treat missing as 'unknown → allow' rather than silently emptying the picker on those gateways. Only hide models whose supported_parameters is an explicit list that omits tools. Tests cover: tools present → kept, tools absent → dropped, field missing → kept, malformed non-list → kept, non-dict item → kept, empty list → dropped. * feat(delegate): cross-agent file state coordination for concurrent subagents Prevents mangled edits when concurrent subagents touch the same file (same process, same filesystem — the mangle scenario from #11215). Three layers, all opt-out via HERMES_DISABLE_FILE_STATE_GUARD=1: 1. FileStateRegistry (tools/file_state.py) — process-wide singleton tracking per-agent read stamps and the last writer globally. check_stale() names the sibling subagent in the warning when a non-owning agent wrote after this agent's last read. 2. Per-path threading.Lock wrapped around the read-modify-write region in write_file_tool and patch_tool. Concurrent siblings on the same path serialize; different paths stay fully parallel. V4A multi-file patches lock in sorted path order (deadlock-free). 3. Delegate-completion reminder in tools/delegate_tool.py: after a subagent returns, writes_since(parent, child_start, parent_reads) appends '[NOTE: subagent modified files the parent previously read — re-read before editing: ...]' to entry.summary when the child touched anything the parent had already seen. Complements (does not replace) the existing path-overlap check in run_agent._should_parallelize_tool_batch — batch check prevents same-file parallel dispatch within one agent's turn (cheap prevention, zero API cost), registry catches cross-subagent and cross-turn staleness at write time (detection). Behavior is warning-only, not hard-failing — matches existing project style. Errors surface naturally: sibling writes often invalidate the old_string in patch operations, which already errors cleanly. Tests: tests/tools/test_file_state_registry.py — 16 tests covering registry state transitions, per-path locking, per-path-not-global locking, writes_since filtering, kill switch, and end-to-end integration through the real read_file/write_file/patch handlers.
This commit is contained in:
parent
35a4b093d8
commit
9c9d9b7ddf
6 changed files with 785 additions and 39 deletions
|
|
@ -12,6 +12,7 @@ from typing import Optional
|
|||
from agent.file_safety import get_read_block_error
|
||||
from tools.binary_extensions import has_binary_extension
|
||||
from tools.file_operations import ShellFileOperations
|
||||
from tools import file_state
|
||||
from agent.redact import redact_sensitive_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
|
@ -483,6 +484,19 @@ def read_file_tool(path: str, offset: int = 1, limit: int = 500, task_id: str =
|
|||
# accumulate megabytes of dict/set state. See _cap_read_tracker_data.
|
||||
_cap_read_tracker_data(task_data)
|
||||
|
||||
# Cross-agent file-state registry (separate from per-task read
|
||||
# tracker above): records that THIS agent has read this path so
|
||||
# write/patch can detect sibling-subagent writes that happened
|
||||
# after our read. Partial read when offset>1 or the read was
|
||||
# truncated (large file with more content than limit covered).
|
||||
# Outside the _read_tracker_lock so the registry's own locking
|
||||
# isn't nested under ours.
|
||||
try:
|
||||
_partial = (offset > 1) or bool(result_dict.get("truncated"))
|
||||
file_state.record_read(task_id, resolved_str, partial=_partial)
|
||||
except Exception:
|
||||
logger.debug("file_state.record_read failed", exc_info=True)
|
||||
|
||||
if count >= 4:
|
||||
# Hard block: stop returning content to break the loop
|
||||
return json.dumps({
|
||||
|
|
@ -602,15 +616,43 @@ def write_file_tool(path: str, content: str, task_id: str = "default") -> str:
|
|||
if sensitive_err:
|
||||
return tool_error(sensitive_err)
|
||||
try:
|
||||
stale_warning = _check_file_staleness(path, task_id)
|
||||
file_ops = _get_file_ops(task_id)
|
||||
result = file_ops.write_file(path, content)
|
||||
result_dict = result.to_dict()
|
||||
if stale_warning:
|
||||
result_dict["_warning"] = stale_warning
|
||||
# Refresh the stored timestamp so consecutive writes by this
|
||||
# task don't trigger false staleness warnings.
|
||||
_update_read_timestamp(path, task_id)
|
||||
# Resolve once for the registry lock + stale check. Failures here
|
||||
# fall back to the legacy path — write proceeds, per-task staleness
|
||||
# check below still runs.
|
||||
try:
|
||||
_resolved = str(_resolve_path(path))
|
||||
except Exception:
|
||||
_resolved = None
|
||||
|
||||
if _resolved is None:
|
||||
stale_warning = _check_file_staleness(path, task_id)
|
||||
file_ops = _get_file_ops(task_id)
|
||||
result = file_ops.write_file(path, content)
|
||||
result_dict = result.to_dict()
|
||||
if stale_warning:
|
||||
result_dict["_warning"] = stale_warning
|
||||
_update_read_timestamp(path, task_id)
|
||||
return json.dumps(result_dict, ensure_ascii=False)
|
||||
|
||||
# Serialize the read→modify→write region per-path so concurrent
|
||||
# subagents can't interleave on the same file. Different paths
|
||||
# remain fully parallel.
|
||||
with file_state.lock_path(_resolved):
|
||||
# Cross-agent staleness wins over per-task warning when both
|
||||
# fire — its message names the sibling subagent.
|
||||
cross_warning = file_state.check_stale(task_id, _resolved)
|
||||
stale_warning = _check_file_staleness(path, task_id)
|
||||
file_ops = _get_file_ops(task_id)
|
||||
result = file_ops.write_file(path, content)
|
||||
result_dict = result.to_dict()
|
||||
effective_warning = cross_warning or stale_warning
|
||||
if effective_warning:
|
||||
result_dict["_warning"] = effective_warning
|
||||
# Refresh stamps after the successful write so consecutive
|
||||
# writes by this task don't trigger false staleness warnings.
|
||||
_update_read_timestamp(path, task_id)
|
||||
if not result_dict.get("error"):
|
||||
file_state.note_write(task_id, _resolved)
|
||||
return json.dumps(result_dict, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
if _is_expected_write_exception(e):
|
||||
|
|
@ -637,36 +679,70 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
|
|||
if sensitive_err:
|
||||
return tool_error(sensitive_err)
|
||||
try:
|
||||
# Check staleness for all files this patch will touch.
|
||||
stale_warnings = []
|
||||
# Resolve paths for locking. Ordered + deduplicated so concurrent
|
||||
# callers lock in the same order — prevents deadlock on overlapping
|
||||
# multi-file V4A patches.
|
||||
_resolved_paths: list[str] = []
|
||||
_seen: set[str] = set()
|
||||
for _p in _paths_to_check:
|
||||
_sw = _check_file_staleness(_p, task_id)
|
||||
if _sw:
|
||||
stale_warnings.append(_sw)
|
||||
try:
|
||||
_r = str(_resolve_path(_p))
|
||||
except Exception:
|
||||
_r = None
|
||||
if _r and _r not in _seen:
|
||||
_resolved_paths.append(_r)
|
||||
_seen.add(_r)
|
||||
_resolved_paths.sort()
|
||||
|
||||
file_ops = _get_file_ops(task_id)
|
||||
|
||||
if mode == "replace":
|
||||
if not path:
|
||||
return tool_error("path required")
|
||||
if old_string is None or new_string is None:
|
||||
return tool_error("old_string and new_string required")
|
||||
result = file_ops.patch_replace(path, old_string, new_string, replace_all)
|
||||
elif mode == "patch":
|
||||
if not patch:
|
||||
return tool_error("patch content required")
|
||||
result = file_ops.patch_v4a(patch)
|
||||
else:
|
||||
return tool_error(f"Unknown mode: {mode}")
|
||||
|
||||
result_dict = result.to_dict()
|
||||
if stale_warnings:
|
||||
result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
|
||||
# Refresh stored timestamps for all successfully-patched paths so
|
||||
# consecutive edits by this task don't trigger false warnings.
|
||||
if not result_dict.get("error"):
|
||||
# Acquire per-path locks in sorted order via ExitStack. On single
|
||||
# path this degenerates to one lock; on empty list (unresolvable)
|
||||
# it's a no-op and execution falls through unchanged.
|
||||
from contextlib import ExitStack
|
||||
with ExitStack() as _locks:
|
||||
for _r in _resolved_paths:
|
||||
_locks.enter_context(file_state.lock_path(_r))
|
||||
|
||||
# Collect warnings — cross-agent registry first (names sibling),
|
||||
# then per-task tracker as a fallback.
|
||||
stale_warnings: list[str] = []
|
||||
_path_to_resolved: dict[str, str] = {}
|
||||
for _p in _paths_to_check:
|
||||
_update_read_timestamp(_p, task_id)
|
||||
try:
|
||||
_r = str(_resolve_path(_p))
|
||||
except Exception:
|
||||
_r = None
|
||||
_path_to_resolved[_p] = _r
|
||||
_cross = file_state.check_stale(task_id, _r) if _r else None
|
||||
_sw = _cross or _check_file_staleness(_p, task_id)
|
||||
if _sw:
|
||||
stale_warnings.append(_sw)
|
||||
|
||||
file_ops = _get_file_ops(task_id)
|
||||
|
||||
if mode == "replace":
|
||||
if not path:
|
||||
return tool_error("path required")
|
||||
if old_string is None or new_string is None:
|
||||
return tool_error("old_string and new_string required")
|
||||
result = file_ops.patch_replace(path, old_string, new_string, replace_all)
|
||||
elif mode == "patch":
|
||||
if not patch:
|
||||
return tool_error("patch content required")
|
||||
result = file_ops.patch_v4a(patch)
|
||||
else:
|
||||
return tool_error(f"Unknown mode: {mode}")
|
||||
|
||||
result_dict = result.to_dict()
|
||||
if stale_warnings:
|
||||
result_dict["_warning"] = stale_warnings[0] if len(stale_warnings) == 1 else " | ".join(stale_warnings)
|
||||
# Refresh stored timestamps for all successfully-patched paths so
|
||||
# consecutive edits by this task don't trigger false warnings.
|
||||
if not result_dict.get("error"):
|
||||
for _p in _paths_to_check:
|
||||
_update_read_timestamp(_p, task_id)
|
||||
_r = _path_to_resolved.get(_p)
|
||||
if _r:
|
||||
file_state.note_write(task_id, _r)
|
||||
result_json = json.dumps(result_dict, ensure_ascii=False)
|
||||
# Hint when old_string not found — saves iterations where the agent
|
||||
# retries with stale content instead of re-reading the file.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue