mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
Merge pull request #52285 from NousResearch/bb/verify-ledger
feat(agent): record coding verification evidence
This commit is contained in:
commit
da0320bf40
4 changed files with 933 additions and 7 deletions
|
|
@ -1266,8 +1266,43 @@ def _check_file_staleness(filepath: str, task_id: str) -> str | None:
|
|||
return None
|
||||
|
||||
|
||||
def _mark_verification_stale(
|
||||
task_id: str,
|
||||
resolved_paths: list[str],
|
||||
session_id: str | None = None,
|
||||
) -> None:
|
||||
"""Best-effort note that successful edits made prior verification stale."""
|
||||
paths = [p for p in resolved_paths if p]
|
||||
if not paths:
|
||||
return
|
||||
try:
|
||||
from agent.coding_context import project_facts_for
|
||||
from agent.verification_evidence import mark_workspace_edited
|
||||
|
||||
cwd = None
|
||||
for path in paths:
|
||||
try:
|
||||
candidate = str(Path(path).parent)
|
||||
except Exception:
|
||||
continue
|
||||
if project_facts_for(candidate):
|
||||
cwd = candidate
|
||||
break
|
||||
if cwd is None:
|
||||
cwd = _authoritative_workspace_root(task_id)
|
||||
if cwd is None:
|
||||
try:
|
||||
cwd = str(Path(paths[0]).parent)
|
||||
except Exception:
|
||||
cwd = None
|
||||
mark_workspace_edited(session_id=session_id or task_id, cwd=cwd, paths=paths)
|
||||
except Exception:
|
||||
logger.debug("verification stale marker failed", exc_info=True)
|
||||
|
||||
|
||||
def write_file_tool(path: str, content: str, task_id: str = "default",
|
||||
cross_profile: bool = False) -> str:
|
||||
cross_profile: bool = False,
|
||||
session_id: str | None = None) -> str:
|
||||
"""Write content to a file.
|
||||
|
||||
``cross_profile`` opts out of the soft cross-Hermes-profile guard. The
|
||||
|
|
@ -1305,6 +1340,8 @@ def write_file_tool(path: str, content: str, task_id: str = "default",
|
|||
result_dict = result.to_dict()
|
||||
if stale_warning:
|
||||
result_dict["_warning"] = stale_warning
|
||||
if not result_dict.get("error"):
|
||||
_mark_verification_stale(task_id, [path], session_id=session_id)
|
||||
_update_read_timestamp(path, task_id)
|
||||
return json.dumps(result_dict, ensure_ascii=False)
|
||||
|
||||
|
|
@ -1331,6 +1368,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default",
|
|||
result_dict["resolved_path"] = _resolved
|
||||
if not result_dict.get("error"):
|
||||
result_dict["files_modified"] = [_resolved]
|
||||
_mark_verification_stale(task_id, [_resolved], session_id=session_id)
|
||||
# Refresh stamps after the successful write so consecutive
|
||||
# writes by this task don't trigger false staleness warnings.
|
||||
_update_read_timestamp(path, task_id)
|
||||
|
|
@ -1347,7 +1385,8 @@ def write_file_tool(path: str, content: str, task_id: str = "default",
|
|||
|
||||
def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
|
||||
new_string: str = None, replace_all: bool = False, patch: str = None,
|
||||
task_id: str = "default", cross_profile: bool = False) -> str:
|
||||
task_id: str = "default", cross_profile: bool = False,
|
||||
session_id: str | None = None) -> str:
|
||||
"""Patch a file using replace mode or V4A patch format.
|
||||
|
||||
``cross_profile`` opts out of the soft cross-Hermes-profile guard for
|
||||
|
|
@ -1465,6 +1504,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
|
|||
result_dict["files_modified"] = _resolved_modified
|
||||
if len(_resolved_modified) == 1:
|
||||
result_dict["resolved_path"] = _resolved_modified[0]
|
||||
_mark_verification_stale(task_id, _resolved_modified, session_id=session_id)
|
||||
for _p in _paths_to_check:
|
||||
_update_read_timestamp(_p, task_id)
|
||||
_r = _path_to_resolved.get(_p)
|
||||
|
|
@ -1730,6 +1770,7 @@ def _handle_write_file(args, **kw):
|
|||
return write_file_tool(
|
||||
path=args["path"], content=args["content"], task_id=tid,
|
||||
cross_profile=bool(args.get("cross_profile", False)),
|
||||
session_id=kw.get("session_id"),
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -1740,6 +1781,7 @@ def _handle_patch(args, **kw):
|
|||
old_string=args.get("old_string"), new_string=args.get("new_string"),
|
||||
replace_all=args.get("replace_all", False), patch=args.get("patch"), task_id=tid,
|
||||
cross_profile=bool(args.get("cross_profile", False)),
|
||||
session_id=kw.get("session_id"),
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1872,6 +1872,7 @@ def terminal_tool(
|
|||
background: bool = False,
|
||||
timeout: Optional[int] = None,
|
||||
task_id: Optional[str] = None,
|
||||
session_id: Optional[str] = None,
|
||||
force: bool = False,
|
||||
workdir: Optional[str] = None,
|
||||
pty: bool = False,
|
||||
|
|
@ -1886,6 +1887,7 @@ def terminal_tool(
|
|||
background: Whether to run in background (default: False)
|
||||
timeout: Command timeout in seconds (default: from config)
|
||||
task_id: Unique identifier for environment isolation (optional)
|
||||
session_id: Conversation/session identifier for durable observability
|
||||
force: If True, skip dangerous command check (use after user confirms)
|
||||
workdir: Working directory for this command (optional, uses session cwd if not set)
|
||||
pty: If True, use pseudo-terminal for interactive CLI tools (local backend only)
|
||||
|
|
@ -2441,16 +2443,18 @@ def terminal_tool(
|
|||
max_retries = 3
|
||||
retry_count = 0
|
||||
result = None
|
||||
command_cwd = None
|
||||
|
||||
while retry_count <= max_retries:
|
||||
try:
|
||||
command_cwd = _resolve_command_cwd(
|
||||
workdir=workdir,
|
||||
env=env,
|
||||
default_cwd=cwd,
|
||||
)
|
||||
execute_kwargs = {
|
||||
"timeout": effective_timeout,
|
||||
"cwd": _resolve_command_cwd(
|
||||
workdir=workdir,
|
||||
env=env,
|
||||
default_cwd=cwd,
|
||||
),
|
||||
"cwd": command_cwd,
|
||||
}
|
||||
result = env.execute(command, **execute_kwargs)
|
||||
except Exception as e:
|
||||
|
|
@ -2541,6 +2545,25 @@ def terminal_tool(
|
|||
"exit_code": returncode,
|
||||
"error": None,
|
||||
}
|
||||
try:
|
||||
from agent.verification_evidence import record_terminal_result
|
||||
|
||||
evidence = record_terminal_result(
|
||||
command=command,
|
||||
cwd=command_cwd,
|
||||
session_id=session_id or task_id or effective_task_id or "default",
|
||||
exit_code=returncode,
|
||||
output=output,
|
||||
)
|
||||
if evidence:
|
||||
result_dict["verification_evidence"] = {
|
||||
"status": evidence.get("status"),
|
||||
"kind": evidence.get("kind"),
|
||||
"scope": evidence.get("scope"),
|
||||
"canonical_command": evidence.get("canonical_command"),
|
||||
}
|
||||
except Exception:
|
||||
logger.debug("verification evidence recording failed", exc_info=True)
|
||||
if approval_note:
|
||||
result_dict["approval"] = approval_note
|
||||
if exit_note:
|
||||
|
|
@ -2774,6 +2797,7 @@ def _handle_terminal(args, **kw):
|
|||
background=args.get("background", False),
|
||||
timeout=args.get("timeout"),
|
||||
task_id=kw.get("task_id"),
|
||||
session_id=kw.get("session_id"),
|
||||
workdir=args.get("workdir"),
|
||||
pty=args.get("pty", False),
|
||||
notify_on_complete=args.get("notify_on_complete", False),
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue