feat(kanban): stamp handoff freshness so workers don't read stale state as current (#53973)

Multi-agent boards leak staleness: a sibling worker's parent handoff,
comment, or prior-attempt summary gets read by the next worker as live
truth even when it's a day old. build_worker_context surfaced the text
with (at best) a bare absolute timestamp, which an LLM reads as fact
regardless of age — parent results had no timestamp at all.

Adds a coarse relative-age stamp (just now / 18h ago / 3d ago) to every
recalled-state line and a one-line 'point-in-time snapshot, re-verify
against source' frame on the parent-results section, so the worker sees
when handoffs were produced and re-checks stale ones before acting.
This commit is contained in:
Teknium 2026-06-27 21:21:54 -07:00 committed by GitHub
parent 131c9c542c
commit 3b23a984b5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 130 additions and 4 deletions

View file

@ -286,6 +286,43 @@ _CTX_MAX_BODY_BYTES = 8 * 1024 # 8 KB per task.body (opening post)
_CTX_MAX_COMMENT_BYTES = 2 * 1024 # 2 KB per comment
def _relative_age(ts: Optional[int], now: Optional[int] = None) -> str:
"""Render the age of an epoch-seconds timestamp as a coarse, human-
readable string like ``just now``, ``18h ago``, ``3d ago``.
Workers read parent handoffs, comments, and prior-attempt summaries as
if they describe *current* state. A bare absolute timestamp
(``2026-06-25 14:30``) doesn't make an LLM reason about staleness — it
reads the content as fact regardless of how old it is. A relative age
("18h ago") is the signal that prompts the worker to re-verify against
the live source before acting on stale sibling work. Returns an empty
string for missing/invalid timestamps so callers can append
unconditionally.
"""
if ts is None:
return ""
try:
ts = int(ts)
except (TypeError, ValueError):
return ""
if now is None:
now = int(time.time())
delta = now - ts
if delta < 0:
# Clock skew across machines/profiles — don't claim "in the future".
return "just now"
if delta < 60:
return "just now"
if delta < 3600:
m = delta // 60
return f"{m}m ago"
if delta < 86400:
h = delta // 3600
return f"{h}h ago"
d = delta // 86400
return f"{d}d ago"
# ---------------------------------------------------------------------------
# Paths
# ---------------------------------------------------------------------------
@ -7843,6 +7880,11 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
if not task:
raise ValueError(f"unknown task {task_id}")
# Single clock reading shared by every relative-age stamp below, so all
# ages in one rendering are consistent ("3h ago" / "3h ago", not drifting
# by the seconds it takes to build the block).
_now = int(time.time())
def _cap(s: Optional[str], limit: int = _CTX_MAX_FIELD_BYTES) -> str:
"""Truncate a string to `limit` chars with a visible ellipsis."""
if not s:
@ -7922,9 +7964,11 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
for offset, run in enumerate(shown):
idx = first_shown_idx + offset
ts = time.strftime("%Y-%m-%d %H:%M", time.localtime(run.started_at))
age = _relative_age(run.started_at, _now)
ts_disp = f"{ts}, {age}" if age else ts
profile = run.profile or "(unknown)"
outcome = run.outcome or run.status
lines.append(f"### Attempt {idx}{outcome} ({profile}, {ts})")
lines.append(f"### Attempt {idx}{outcome} ({profile}, {ts_disp})")
if run.summary and run.summary.strip():
lines.append(_cap(run.summary))
if run.error and run.error.strip():
@ -7958,8 +8002,24 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
if not wrote_header:
lines.append("## Parent task results")
lines.append(
"_Handoffs from upstream tasks, captured when each parent "
"completed (see age below). These are point-in-time "
"snapshots, not live state — if a result drives your "
"current work and it's not recent, re-verify against the "
"source before acting on it as current._"
)
wrote_header = True
lines.append(f"### {pid}")
# When did this parent's result get produced? Prefer the
# completed run's end time; fall back to the task's completed_at.
done_ts = None
if run is not None and getattr(run, "ended_at", None):
done_ts = run.ended_at
elif pt.completed_at:
done_ts = pt.completed_at
age = _relative_age(done_ts, _now)
lines.append(f"### {pid}" + (f" (completed {age})" if age else ""))
body_lines: list[str] = []
if run is not None and run.summary and run.summary.strip():
@ -7999,9 +8059,11 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
ts = time.strftime(
"%Y-%m-%d %H:%M", time.localtime(int(row["ended_at"]))
)
age = _relative_age(row["ended_at"], _now)
ts_disp = f"{ts}, {age}" if age else ts
s = (row["summary"] or "").strip().splitlines()
first = s[0][:200] if s else "(no summary)"
lines.append(f"- {row['id']}{row['title']} ({ts}): {first}")
lines.append(f"- {row['id']}{row['title']} ({ts_disp}): {first}")
lines.append("")
# Comments: cap at the most-recent _CTX_MAX_COMMENTS so
@ -8023,6 +8085,8 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
)
for c in shown_c:
ts = time.strftime("%Y-%m-%d %H:%M", time.localtime(c.created_at))
age = _relative_age(c.created_at, _now)
ts_disp = f"{ts}, {age}" if age else ts
# Render author with explicit "comment from worker" framing so
# operator-controlled HERMES_PROFILE values like "hermes-system"
# or "operator" can't be misread by the next worker as a system
@ -8030,7 +8094,7 @@ def build_worker_context(conn: sqlite3.Connection, task_id: str) -> str:
# Defense-in-depth — the LLM-controlled author-forgery surface
# was already closed in #22435. See #22452.
safe_author = (c.author or "").replace("`", "")
lines.append(f"comment from worker `{safe_author}` at {ts}:")
lines.append(f"comment from worker `{safe_author}` at {ts_disp}:")
lines.append(_cap(c.body, _CTX_MAX_COMMENT_BYTES))
lines.append("")

View file

@ -1701,6 +1701,68 @@ def test_build_worker_context_uses_parent_run_summary(kanban_home):
conn.close()
def test_relative_age_renders_coarse_buckets():
"""Freshness helper turns epoch seconds into coarse human ages, and
degrades safely on missing / future timestamps."""
now = 1_000_000
assert kb._relative_age(now, now) == "just now"
assert kb._relative_age(now - 30, now) == "just now"
assert kb._relative_age(now - 5 * 60, now) == "5m ago"
assert kb._relative_age(now - 18 * 3600, now) == "18h ago"
assert kb._relative_age(now - 2 * 86400, now) == "2d ago"
# Clock skew across machines/profiles must not claim "in the future".
assert kb._relative_age(now + 500, now) == "just now"
# Missing / unparseable timestamps render empty so callers can append
# unconditionally.
assert kb._relative_age(None, now) == ""
# Defensive: an unparseable value (e.g. a stray string) renders empty
# rather than raising.
assert kb._relative_age("garbage", now) == "" # type: ignore[arg-type]
def test_build_worker_context_stamps_parent_freshness(kanban_home):
"""Parent handoffs carry a relative age + a 'verify against source'
frame so a worker doesn't read a day-old result as live state.
This is the multi-agent staleness gap: an orchestrator + sibling
workers leave reports/handoffs that the next worker reads as current
truth. The age stamp is the signal that prompts re-verification.
"""
conn = kb.connect()
try:
parent = kb.create_task(conn, title="research", assignee="researcher")
child = kb.create_task(
conn, title="write", assignee="writer", parents=[parent],
)
kb.claim_task(conn, parent)
kb.complete_task(
conn, parent,
result="done",
summary="meeting ingest workflow finished; pipeline ready",
)
# Backdate the parent's completion to 18h ago — both the task row
# and its completed run row, which is where build_worker_context
# reads the handoff timestamp from.
old = int(time.time()) - 18 * 3600
with kb.write_txn(conn):
conn.execute(
"UPDATE tasks SET completed_at = ? WHERE id = ?", (old, parent),
)
conn.execute(
"UPDATE task_runs SET ended_at = ? WHERE task_id = ?",
(old, parent),
)
ctx = kb.build_worker_context(conn, child)
# The handoff still appears...
assert "meeting ingest workflow finished" in ctx
# ...now stamped with its age and framed as a point-in-time snapshot.
assert "completed 18h ago" in ctx
assert "point-in-time snapshots, not live state" in ctx
finally:
conn.close()
def test_migration_backfills_inflight_run_for_legacy_db(kanban_home):
"""An existing 'running' task from before task_runs existed should
get a synthesized run row so subsequent operations (complete,