Merge pull request #52285 from NousResearch/bb/verify-ledger

feat(agent): record coding verification evidence
This commit is contained in:
brooklyn! 2026-06-24 23:07:10 -05:00 committed by GitHub
commit da0320bf40
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 933 additions and 7 deletions

View file

@ -1266,8 +1266,43 @@ def _check_file_staleness(filepath: str, task_id: str) -> str | None:
return None
def _mark_verification_stale(
task_id: str,
resolved_paths: list[str],
session_id: str | None = None,
) -> None:
"""Best-effort note that successful edits made prior verification stale."""
paths = [p for p in resolved_paths if p]
if not paths:
return
try:
from agent.coding_context import project_facts_for
from agent.verification_evidence import mark_workspace_edited
cwd = None
for path in paths:
try:
candidate = str(Path(path).parent)
except Exception:
continue
if project_facts_for(candidate):
cwd = candidate
break
if cwd is None:
cwd = _authoritative_workspace_root(task_id)
if cwd is None:
try:
cwd = str(Path(paths[0]).parent)
except Exception:
cwd = None
mark_workspace_edited(session_id=session_id or task_id, cwd=cwd, paths=paths)
except Exception:
logger.debug("verification stale marker failed", exc_info=True)
def write_file_tool(path: str, content: str, task_id: str = "default",
cross_profile: bool = False) -> str:
cross_profile: bool = False,
session_id: str | None = None) -> str:
"""Write content to a file.
``cross_profile`` opts out of the soft cross-Hermes-profile guard. The
@ -1305,6 +1340,8 @@ def write_file_tool(path: str, content: str, task_id: str = "default",
result_dict = result.to_dict()
if stale_warning:
result_dict["_warning"] = stale_warning
if not result_dict.get("error"):
_mark_verification_stale(task_id, [path], session_id=session_id)
_update_read_timestamp(path, task_id)
return json.dumps(result_dict, ensure_ascii=False)
@ -1331,6 +1368,7 @@ def write_file_tool(path: str, content: str, task_id: str = "default",
result_dict["resolved_path"] = _resolved
if not result_dict.get("error"):
result_dict["files_modified"] = [_resolved]
_mark_verification_stale(task_id, [_resolved], session_id=session_id)
# Refresh stamps after the successful write so consecutive
# writes by this task don't trigger false staleness warnings.
_update_read_timestamp(path, task_id)
@ -1347,7 +1385,8 @@ def write_file_tool(path: str, content: str, task_id: str = "default",
def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
new_string: str = None, replace_all: bool = False, patch: str = None,
task_id: str = "default", cross_profile: bool = False) -> str:
task_id: str = "default", cross_profile: bool = False,
session_id: str | None = None) -> str:
"""Patch a file using replace mode or V4A patch format.
``cross_profile`` opts out of the soft cross-Hermes-profile guard for
@ -1465,6 +1504,7 @@ def patch_tool(mode: str = "replace", path: str = None, old_string: str = None,
result_dict["files_modified"] = _resolved_modified
if len(_resolved_modified) == 1:
result_dict["resolved_path"] = _resolved_modified[0]
_mark_verification_stale(task_id, _resolved_modified, session_id=session_id)
for _p in _paths_to_check:
_update_read_timestamp(_p, task_id)
_r = _path_to_resolved.get(_p)
@ -1730,6 +1770,7 @@ def _handle_write_file(args, **kw):
return write_file_tool(
path=args["path"], content=args["content"], task_id=tid,
cross_profile=bool(args.get("cross_profile", False)),
session_id=kw.get("session_id"),
)
@ -1740,6 +1781,7 @@ def _handle_patch(args, **kw):
old_string=args.get("old_string"), new_string=args.get("new_string"),
replace_all=args.get("replace_all", False), patch=args.get("patch"), task_id=tid,
cross_profile=bool(args.get("cross_profile", False)),
session_id=kw.get("session_id"),
)

View file

@ -1872,6 +1872,7 @@ def terminal_tool(
background: bool = False,
timeout: Optional[int] = None,
task_id: Optional[str] = None,
session_id: Optional[str] = None,
force: bool = False,
workdir: Optional[str] = None,
pty: bool = False,
@ -1886,6 +1887,7 @@ def terminal_tool(
background: Whether to run in background (default: False)
timeout: Command timeout in seconds (default: from config)
task_id: Unique identifier for environment isolation (optional)
session_id: Conversation/session identifier for durable observability
force: If True, skip dangerous command check (use after user confirms)
workdir: Working directory for this command (optional, uses session cwd if not set)
pty: If True, use pseudo-terminal for interactive CLI tools (local backend only)
@ -2441,16 +2443,18 @@ def terminal_tool(
max_retries = 3
retry_count = 0
result = None
command_cwd = None
while retry_count <= max_retries:
try:
command_cwd = _resolve_command_cwd(
workdir=workdir,
env=env,
default_cwd=cwd,
)
execute_kwargs = {
"timeout": effective_timeout,
"cwd": _resolve_command_cwd(
workdir=workdir,
env=env,
default_cwd=cwd,
),
"cwd": command_cwd,
}
result = env.execute(command, **execute_kwargs)
except Exception as e:
@ -2541,6 +2545,25 @@ def terminal_tool(
"exit_code": returncode,
"error": None,
}
try:
from agent.verification_evidence import record_terminal_result
evidence = record_terminal_result(
command=command,
cwd=command_cwd,
session_id=session_id or task_id or effective_task_id or "default",
exit_code=returncode,
output=output,
)
if evidence:
result_dict["verification_evidence"] = {
"status": evidence.get("status"),
"kind": evidence.get("kind"),
"scope": evidence.get("scope"),
"canonical_command": evidence.get("canonical_command"),
}
except Exception:
logger.debug("verification evidence recording failed", exc_info=True)
if approval_note:
result_dict["approval"] = approval_note
if exit_note:
@ -2774,6 +2797,7 @@ def _handle_terminal(args, **kw):
background=args.get("background", False),
timeout=args.get("timeout"),
task_id=kw.get("task_id"),
session_id=kw.get("session_id"),
workdir=args.get("workdir"),
pty=args.get("pty", False),
notify_on_complete=args.get("notify_on_complete", False),