mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-07-01 12:02:05 +00:00
feat(kanban): stamp handoff freshness so workers don't read stale state as current (#53973)
Multi-agent boards leak staleness: a sibling worker's parent handoff, comment, or prior-attempt summary gets read by the next worker as live truth even when it's a day old. build_worker_context surfaced the text with (at best) a bare absolute timestamp, which an LLM reads as fact regardless of age — parent results had no timestamp at all. Adds a coarse relative-age stamp (just now / 18h ago / 3d ago) to every recalled-state line and a one-line 'point-in-time snapshot, re-verify against source' frame on the parent-results section, so the worker sees when handoffs were produced and re-checks stale ones before acting.
This commit is contained in:
parent
131c9c542c
commit
3b23a984b5
2 changed files with 130 additions and 4 deletions
|
|
@ -286,6 +286,43 @@ _CTX_MAX_BODY_BYTES = 8 * 1024 # 8 KB per task.body (opening post)
|
|||
_CTX_MAX_COMMENT_BYTES = 2 * 1024 # 2 KB per comment
|
||||
|
||||
|
||||
def _relative_age(ts: Optional[int], now: Optional[int] = None) -> str:
|
||||
"""Render the age of an epoch-seconds timestamp as a coarse, human-
|
||||
readable string like ``just now``, ``18h ago``, ``3d ago``.
|
||||
|
||||
Workers read parent handoffs, comments, and prior-attempt summaries as
|
||||
if they describe *current* state. A bare absolute timestamp
|
||||
(``2026-06-25 14:30``) doesn't make an LLM reason about staleness — it
|
||||
reads the content as fact regardless of how old it is. A relative age
|
||||
("18h ago") is the signal that prompts the worker to re-verify against
|
||||
the live source before acting on stale sibling work. Returns an empty
|
||||
string for missing/invalid timestamps so callers can append
|
||||
unconditionally.
|
||||
"""
|
||||
if ts is None:
|
||||
return ""
|
||||
try:
|
||||
ts = int(ts)
|
||||
except (TypeError, ValueError):
|
||||
return ""
|
||||
if now is None:
|
||||
now = int(time.time())
|
||||
delta = now - ts
|
||||
if delta < 0:
|
||||
# Clock skew across machines/profiles — don't claim "in the future".
|
||||
return "just now"
|
||||
if delta < 60:
|
||||
return "just now"
|
||||
if delta < 3600:
|
||||
m = delta // 60
|
||||
return f"{m}m ago"
|
||||
if delta < 86400:
|
||||
h = delta // 3600
|
||||
return f"{h}h ago"
|
||||
d = delta // 86400
|
||||
return f"{d}d ago"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Paths
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -7843,6 +7880,11 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
|
|||
if not task:
|
||||
raise ValueError(f"unknown task {task_id}")
|
||||
|
||||
# Single clock reading shared by every relative-age stamp below, so all
|
||||
# ages in one rendering are consistent ("3h ago" / "3h ago", not drifting
|
||||
# by the seconds it takes to build the block).
|
||||
_now = int(time.time())
|
||||
|
||||
def _cap(s: Optional[str], limit: int = _CTX_MAX_FIELD_BYTES) -> str:
|
||||
"""Truncate a string to `limit` chars with a visible ellipsis."""
|
||||
if not s:
|
||||
|
|
@ -7922,9 +7964,11 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
|
|||
for offset, run in enumerate(shown):
|
||||
idx = first_shown_idx + offset
|
||||
ts = time.strftime("%Y-%m-%d %H:%M", time.localtime(run.started_at))
|
||||
age = _relative_age(run.started_at, _now)
|
||||
ts_disp = f"{ts}, {age}" if age else ts
|
||||
profile = run.profile or "(unknown)"
|
||||
outcome = run.outcome or run.status
|
||||
lines.append(f"### Attempt {idx} — {outcome} ({profile}, {ts})")
|
||||
lines.append(f"### Attempt {idx} — {outcome} ({profile}, {ts_disp})")
|
||||
if run.summary and run.summary.strip():
|
||||
lines.append(_cap(run.summary))
|
||||
if run.error and run.error.strip():
|
||||
|
|
@ -7958,8 +8002,24 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
|
|||
|
||||
if not wrote_header:
|
||||
lines.append("## Parent task results")
|
||||
lines.append(
|
||||
"_Handoffs from upstream tasks, captured when each parent "
|
||||
"completed (see age below). These are point-in-time "
|
||||
"snapshots, not live state — if a result drives your "
|
||||
"current work and it's not recent, re-verify against the "
|
||||
"source before acting on it as current._"
|
||||
)
|
||||
wrote_header = True
|
||||
lines.append(f"### {pid}")
|
||||
|
||||
# When did this parent's result get produced? Prefer the
|
||||
# completed run's end time; fall back to the task's completed_at.
|
||||
done_ts = None
|
||||
if run is not None and getattr(run, "ended_at", None):
|
||||
done_ts = run.ended_at
|
||||
elif pt.completed_at:
|
||||
done_ts = pt.completed_at
|
||||
age = _relative_age(done_ts, _now)
|
||||
lines.append(f"### {pid}" + (f" (completed {age})" if age else ""))
|
||||
|
||||
body_lines: list[str] = []
|
||||
if run is not None and run.summary and run.summary.strip():
|
||||
|
|
@ -7999,9 +8059,11 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
|
|||
ts = time.strftime(
|
||||
"%Y-%m-%d %H:%M", time.localtime(int(row["ended_at"]))
|
||||
)
|
||||
age = _relative_age(row["ended_at"], _now)
|
||||
ts_disp = f"{ts}, {age}" if age else ts
|
||||
s = (row["summary"] or "").strip().splitlines()
|
||||
first = s[0][:200] if s else "(no summary)"
|
||||
lines.append(f"- {row['id']} — {row['title']} ({ts}): {first}")
|
||||
lines.append(f"- {row['id']} — {row['title']} ({ts_disp}): {first}")
|
||||
lines.append("")
|
||||
|
||||
# Comments: cap at the most-recent _CTX_MAX_COMMENTS so
|
||||
|
|
@ -8023,6 +8085,8 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
|
|||
)
|
||||
for c in shown_c:
|
||||
ts = time.strftime("%Y-%m-%d %H:%M", time.localtime(c.created_at))
|
||||
age = _relative_age(c.created_at, _now)
|
||||
ts_disp = f"{ts}, {age}" if age else ts
|
||||
# Render author with explicit "comment from worker" framing so
|
||||
# operator-controlled HERMES_PROFILE values like "hermes-system"
|
||||
# or "operator" can't be misread by the next worker as a system
|
||||
|
|
@ -8030,7 +8094,7 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
|
|||
# Defense-in-depth — the LLM-controlled author-forgery surface
|
||||
# was already closed in #22435. See #22452.
|
||||
safe_author = (c.author or "").replace("`", "")
|
||||
lines.append(f"comment from worker `{safe_author}` at {ts}:")
|
||||
lines.append(f"comment from worker `{safe_author}` at {ts_disp}:")
|
||||
lines.append(_cap(c.body, _CTX_MAX_COMMENT_BYTES))
|
||||
lines.append("")
|
||||
|
||||
|
|
|
|||
|
|
@ -1701,6 +1701,68 @@ def test_build_worker_context_uses_parent_run_summary(kanban_home):
|
|||
conn.close()
|
||||
|
||||
|
||||
def test_relative_age_renders_coarse_buckets():
|
||||
"""Freshness helper turns epoch seconds into coarse human ages, and
|
||||
degrades safely on missing / future timestamps."""
|
||||
now = 1_000_000
|
||||
assert kb._relative_age(now, now) == "just now"
|
||||
assert kb._relative_age(now - 30, now) == "just now"
|
||||
assert kb._relative_age(now - 5 * 60, now) == "5m ago"
|
||||
assert kb._relative_age(now - 18 * 3600, now) == "18h ago"
|
||||
assert kb._relative_age(now - 2 * 86400, now) == "2d ago"
|
||||
# Clock skew across machines/profiles must not claim "in the future".
|
||||
assert kb._relative_age(now + 500, now) == "just now"
|
||||
# Missing / unparseable timestamps render empty so callers can append
|
||||
# unconditionally.
|
||||
assert kb._relative_age(None, now) == ""
|
||||
# Defensive: an unparseable value (e.g. a stray string) renders empty
|
||||
# rather than raising.
|
||||
assert kb._relative_age("garbage", now) == "" # type: ignore[arg-type]
|
||||
|
||||
|
||||
def test_build_worker_context_stamps_parent_freshness(kanban_home):
|
||||
"""Parent handoffs carry a relative age + a 'verify against source'
|
||||
frame so a worker doesn't read a day-old result as live state.
|
||||
|
||||
This is the multi-agent staleness gap: an orchestrator + sibling
|
||||
workers leave reports/handoffs that the next worker reads as current
|
||||
truth. The age stamp is the signal that prompts re-verification.
|
||||
"""
|
||||
conn = kb.connect()
|
||||
try:
|
||||
parent = kb.create_task(conn, title="research", assignee="researcher")
|
||||
child = kb.create_task(
|
||||
conn, title="write", assignee="writer", parents=[parent],
|
||||
)
|
||||
kb.claim_task(conn, parent)
|
||||
kb.complete_task(
|
||||
conn, parent,
|
||||
result="done",
|
||||
summary="meeting ingest workflow finished; pipeline ready",
|
||||
)
|
||||
# Backdate the parent's completion to 18h ago — both the task row
|
||||
# and its completed run row, which is where build_worker_context
|
||||
# reads the handoff timestamp from.
|
||||
old = int(time.time()) - 18 * 3600
|
||||
with kb.write_txn(conn):
|
||||
conn.execute(
|
||||
"UPDATE tasks SET completed_at = ? WHERE id = ?", (old, parent),
|
||||
)
|
||||
conn.execute(
|
||||
"UPDATE task_runs SET ended_at = ? WHERE task_id = ?",
|
||||
(old, parent),
|
||||
)
|
||||
|
||||
ctx = kb.build_worker_context(conn, child)
|
||||
# The handoff still appears...
|
||||
assert "meeting ingest workflow finished" in ctx
|
||||
# ...now stamped with its age and framed as a point-in-time snapshot.
|
||||
assert "completed 18h ago" in ctx
|
||||
assert "point-in-time snapshots, not live state" in ctx
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
|
||||
def test_migration_backfills_inflight_run_for_legacy_db(kanban_home):
|
||||
"""An existing 'running' task from before task_runs existed should
|
||||
get a synthesized run row so subsequent operations (complete,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue