mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-17 04:31:55 +00:00
fix(gateway): use transcript timestamp for auto-continue freshness
Follow-up to PR #16802 (BeliefanX). The original fix read `agent_history[-1].get("timestamp")` for the tool-tail freshness gate, but `gateway/run.py` strips the `timestamp` field off all tool/tool_call rows when building `agent_history` from the raw transcript (see `clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}`). At runtime the tool-tail branch always saw `None` and silently took the legacy-fresh path — the stale-guard never fired for the tool-tail case it was supposed to cover. Changes: - Read the freshness signal from the RAW `history` list (via new `_last_transcript_timestamp()` helper) BEFORE the strip. Both the resume_pending branch and the tool-tail branch use this single signal, replacing the two divergent ones. - Default window bumped 15 min → 1 hour via new `_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT`. The 15-minute default was shorter than the default `gateway_timeout` of 30 min, so a legitimate long-running turn interrupted near its timeout boundary and resumed shortly after would have been misclassified as stale. - Configurable via `config.yaml` `agent.gateway_auto_continue_freshness` (bridged to `HERMES_AUTO_CONTINUE_FRESHNESS` at gateway startup — same pattern as `gateway_timeout`). Set to 0 to disable the gate. - `_coerce_gateway_timestamp` now explicitly rejects bool (which is a subclass of int and would otherwise coerce to 0.0/1.0). - Tests rewritten to exercise the real production data shape: raw `history` → `_build_agent_history` strip → freshness decision. A regression guard (`test_stale_tool_tail_with_production_data_shape`) asserts `agent_history` tool rows carry NO timestamp, protecting against someone "fixing" the original bug by re-adding the stripped field (which would break the OpenAI tool-result message contract). Add BeliefanX to scripts/release.py AUTHOR_MAP. E2E verified: config.yaml → env var bridge → helper returns configured value; default 1h window; malformed/empty env var falls back to default; ISO-Z timestamps parse; ms-epoch coerced; bool rejected.
This commit is contained in:
parent
93feffbcfa
commit
7444e49d4e
4 changed files with 407 additions and 35 deletions
121
gateway/run.py
121
gateway/run.py
|
|
@ -43,7 +43,22 @@ _AGENT_CACHE_IDLE_TTL_SECS = 3600.0 # evict agents idle for >1h
|
|||
# Only auto-continue interrupted gateway turns while the interruption is fresh.
|
||||
# Stale tool-tail/resume markers can otherwise revive an unrelated old task
|
||||
# after a gateway restart when the user's next message starts new work.
|
||||
_AUTO_CONTINUE_FRESHNESS_SECS = 15 * 60
|
||||
#
|
||||
# The freshness signal is the timestamp of the last transcript row, which
|
||||
# ``hermes_state.get_messages`` carries on every persisted message. This
|
||||
# handles the two auto-continue cases uniformly:
|
||||
# * resume_pending (gateway restart/shutdown watchdog marked the session)
|
||||
# * tool-tail (last persisted message is a tool result the agent
|
||||
# never got to reply to)
|
||||
# In both cases "when did we last do anything on this transcript" is the
|
||||
# correct freshness question, so one signal replaces two divergent ones.
|
||||
#
|
||||
# Default window: 1 hour. This comfortably covers ``agent.gateway_timeout``
|
||||
# (30 min default) plus runtime slack — a legitimate long-running turn that
|
||||
# gets interrupted near its timeout boundary and is resumed shortly after
|
||||
# is still classified fresh. Override via
|
||||
# ``config.yaml`` ``agent.gateway_auto_continue_freshness``.
|
||||
_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT = 60 * 60
|
||||
|
||||
|
||||
def _coerce_gateway_timestamp(value: Any) -> Optional[float]:
|
||||
|
|
@ -51,11 +66,16 @@ def _coerce_gateway_timestamp(value: Any) -> Optional[float]:
|
|||
|
||||
Missing/unparseable timestamps return None so legacy transcripts keep the
|
||||
historical auto-continue behaviour instead of being silently dropped.
|
||||
Accepts: datetime, epoch seconds (int/float), epoch milliseconds (when
|
||||
the magnitude exceeds year-2286), ISO-8601 strings (with or without a
|
||||
trailing ``Z``), and numeric strings.
|
||||
"""
|
||||
if value is None:
|
||||
return None
|
||||
if isinstance(value, datetime):
|
||||
return value.timestamp()
|
||||
if isinstance(value, bool): # bool is a subclass of int — skip it
|
||||
return None
|
||||
if isinstance(value, (int, float)):
|
||||
# Some platform events use milliseconds; Hermes state rows use seconds.
|
||||
return float(value) / 1000.0 if float(value) > 10_000_000_000 else float(value)
|
||||
|
|
@ -75,17 +95,77 @@ def _coerce_gateway_timestamp(value: Any) -> Optional[float]:
|
|||
return None
|
||||
|
||||
|
||||
def _is_fresh_gateway_interruption(value: Any, *, now: Optional[float] = None) -> bool:
|
||||
def _auto_continue_freshness_window() -> float:
|
||||
"""Return the configured auto-continue freshness window in seconds.
|
||||
|
||||
Reads ``HERMES_AUTO_CONTINUE_FRESHNESS`` (bridged from
|
||||
``config.yaml`` ``agent.gateway_auto_continue_freshness`` at gateway
|
||||
startup, same pattern as ``HERMES_AGENT_TIMEOUT``). Falls back to the
|
||||
module default when unset or malformed. Non-positive values disable
|
||||
the freshness gate (restores the pre-fix "always fresh" behaviour for
|
||||
users who want to opt out).
|
||||
"""
|
||||
raw = os.environ.get("HERMES_AUTO_CONTINUE_FRESHNESS")
|
||||
if raw is None or raw == "":
|
||||
return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
|
||||
try:
|
||||
return float(raw)
|
||||
except (TypeError, ValueError):
|
||||
return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
|
||||
|
||||
|
||||
def _is_fresh_gateway_interruption(
|
||||
value: Any,
|
||||
*,
|
||||
now: Optional[float] = None,
|
||||
window_secs: Optional[float] = None,
|
||||
) -> bool:
|
||||
"""Return True when an interruption marker is fresh enough to auto-continue.
|
||||
|
||||
Unknown timestamps are treated as fresh for backward compatibility with
|
||||
legacy transcripts/tests that predate timestamp persistence.
|
||||
legacy transcripts (pre-dating timestamp persistence) and with in-memory
|
||||
test scaffolding that constructs history entries without timestamps.
|
||||
|
||||
A non-positive ``window_secs`` disables the gate (always fresh), which
|
||||
restores the pre-fix behaviour for users who opt out via config.
|
||||
"""
|
||||
window = (
|
||||
float(window_secs)
|
||||
if window_secs is not None
|
||||
else float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT)
|
||||
)
|
||||
if window <= 0:
|
||||
return True
|
||||
timestamp = _coerce_gateway_timestamp(value)
|
||||
if timestamp is None:
|
||||
return True
|
||||
current = time.time() if now is None else now
|
||||
return current - timestamp <= _AUTO_CONTINUE_FRESHNESS_SECS
|
||||
return current - timestamp <= window
|
||||
|
||||
|
||||
def _last_transcript_timestamp(history: Optional[List[Dict[str, Any]]]) -> Any:
|
||||
"""Return the ``timestamp`` of the last usable transcript row, if any.
|
||||
|
||||
Skips metadata-only rows (``session_meta``, system injections) that are
|
||||
dropped before being handed to the agent. Returns ``None`` when no
|
||||
usable row carries a timestamp — callers should treat that as "fresh"
|
||||
for backward compatibility.
|
||||
"""
|
||||
if not history:
|
||||
return None
|
||||
for msg in reversed(history):
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
role = msg.get("role")
|
||||
if not role or role in ("session_meta", "system"):
|
||||
continue
|
||||
ts = msg.get("timestamp")
|
||||
if ts is not None:
|
||||
return ts
|
||||
# First non-meta row without a timestamp — legacy transcript row.
|
||||
# Returning None lets the caller fall through to the legacy-fresh path.
|
||||
return None
|
||||
return None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
@ -260,6 +340,13 @@ if _config_path.exists():
|
|||
os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"])
|
||||
if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ:
|
||||
os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"])
|
||||
if (
|
||||
"gateway_auto_continue_freshness" in _agent_cfg
|
||||
and "HERMES_AUTO_CONTINUE_FRESHNESS" not in os.environ
|
||||
):
|
||||
os.environ["HERMES_AUTO_CONTINUE_FRESHNESS"] = str(
|
||||
_agent_cfg["gateway_auto_continue_freshness"]
|
||||
)
|
||||
_display_cfg = _cfg.get("display", {})
|
||||
if _display_cfg and isinstance(_display_cfg, dict):
|
||||
if "busy_input_mode" in _display_cfg and "HERMES_GATEWAY_BUSY_INPUT_MODE" not in os.environ:
|
||||
|
|
@ -10554,26 +10641,38 @@ class GatewayRunner:
|
|||
# anything (tool, assistant with unfinished work, etc.), so we
|
||||
# give a stronger, reason-aware instruction that subsumes the
|
||||
# tool-tail case.
|
||||
#
|
||||
# Freshness gate (#16802): both branches are gated on the age
|
||||
# of the last persisted transcript row. That is the correct
|
||||
# "when did we last do anything here" signal for both the
|
||||
# resume_pending path (restart watchdog) and the tool-tail
|
||||
# path (in-flight tool loop killed). We read ``history[-1]``
|
||||
# here because ``agent_history`` has already stripped the
|
||||
# ``timestamp`` field off tool/tool_call rows for API purity
|
||||
# (see the `k != "timestamp"` filter above). Rows without a
|
||||
# timestamp (legacy transcripts) are treated as fresh so the
|
||||
# historical auto-continue behaviour is preserved.
|
||||
_freshness_window = _auto_continue_freshness_window()
|
||||
_interruption_is_fresh = _is_fresh_gateway_interruption(
|
||||
_last_transcript_timestamp(history),
|
||||
window_secs=_freshness_window,
|
||||
)
|
||||
|
||||
_resume_entry = None
|
||||
if session_key:
|
||||
try:
|
||||
_resume_entry = self.session_store._entries.get(session_key)
|
||||
except Exception:
|
||||
_resume_entry = None
|
||||
_resume_marked_at = (
|
||||
getattr(_resume_entry, "last_resume_marked_at", None)
|
||||
if _resume_entry is not None
|
||||
else None
|
||||
)
|
||||
_is_resume_pending = bool(
|
||||
_resume_entry is not None
|
||||
and getattr(_resume_entry, "resume_pending", False)
|
||||
and _is_fresh_gateway_interruption(_resume_marked_at)
|
||||
and _interruption_is_fresh
|
||||
)
|
||||
_has_fresh_tool_tail = bool(
|
||||
agent_history
|
||||
and agent_history[-1].get("role") == "tool"
|
||||
and _is_fresh_gateway_interruption(agent_history[-1].get("timestamp"))
|
||||
and _interruption_is_fresh
|
||||
)
|
||||
|
||||
if _is_resume_pending:
|
||||
|
|
|
|||
|
|
@ -411,6 +411,20 @@ DEFAULT_CONFIG = {
|
|||
# (60+ tool iterations with tiny output) before users assume the
|
||||
# bot is dead and /restart.
|
||||
"gateway_notify_interval": 180,
|
||||
# Freshness window for the gateway auto-continue note (seconds).
|
||||
# After a gateway crash/restart/SIGTERM mid-run, the next user
|
||||
# message gets a "[System note: your previous turn was
|
||||
# interrupted — process the unfinished tool result(s) first]"
|
||||
# prepended so the model picks up where it left off. That's the
|
||||
# right behaviour while the interruption is fresh, but stale
|
||||
# markers (transcript last touched hours or days ago) can revive
|
||||
# an unrelated old task when the user's next message starts new
|
||||
# work. This window is the max age of the last persisted
|
||||
# transcript row for which we still inject the continue note.
|
||||
# Default 3600s comfortably covers a long turn (gateway_timeout
|
||||
# default is 1800s) plus runtime slack. Set to 0 to disable the
|
||||
# gate and restore pre-fix behaviour (always inject).
|
||||
"gateway_auto_continue_freshness": 3600,
|
||||
# How user-attached images are presented to the main model on each turn.
|
||||
# "auto" — attach natively when the active model reports
|
||||
# supports_vision=True AND the user hasn't explicitly
|
||||
|
|
|
|||
|
|
@ -58,6 +58,7 @@ AUTHOR_MAP = {
|
|||
"yes999zc@163.com": "yes999zc",
|
||||
"343873859@qq.com": "DrStrangerUJN",
|
||||
"uzmpsk.dilekakbas@gmail.com": "dlkakbs",
|
||||
"beliefanx@gmail.com": "BeliefanX",
|
||||
"jefferson@heimdallstrategy.com": "Mind-Dragon",
|
||||
"steve.westerhouse@origami-analytics.com": "westers",
|
||||
"130918800+devorun@users.noreply.github.com": "devorun",
|
||||
|
|
|
|||
|
|
@ -33,7 +33,12 @@ from unittest.mock import AsyncMock, MagicMock, patch
|
|||
import pytest
|
||||
|
||||
from gateway.config import GatewayConfig, Platform, PlatformConfig
|
||||
from gateway.run import _is_fresh_gateway_interruption
|
||||
from gateway.run import (
|
||||
_auto_continue_freshness_window,
|
||||
_coerce_gateway_timestamp,
|
||||
_is_fresh_gateway_interruption,
|
||||
_last_transcript_timestamp,
|
||||
)
|
||||
from gateway.session import SessionEntry, SessionSource, SessionStore
|
||||
from tests.gateway.restart_test_helpers import (
|
||||
make_restart_runner,
|
||||
|
|
@ -54,28 +59,69 @@ def _make_store(tmp_path):
|
|||
return SessionStore(sessions_dir=tmp_path, config=GatewayConfig())
|
||||
|
||||
|
||||
def _build_agent_history(history: list) -> list:
|
||||
"""Mirror gateway/run.py's ``history → agent_history`` conversion.
|
||||
|
||||
This is the transformation that strips ``timestamp`` off tool/tool_call
|
||||
rows before the agent sees them. Tests that check the freshness gate
|
||||
must go through this conversion so they exercise the *real* data the
|
||||
note-injection code sees.
|
||||
"""
|
||||
agent_history: list = []
|
||||
for msg in history:
|
||||
role = msg.get("role")
|
||||
if not role or role in ("session_meta", "system"):
|
||||
continue
|
||||
has_tool_calls = "tool_calls" in msg
|
||||
has_tool_call_id = "tool_call_id" in msg
|
||||
is_tool_message = role == "tool"
|
||||
if has_tool_calls or has_tool_call_id or is_tool_message:
|
||||
agent_history.append({k: v for k, v in msg.items() if k != "timestamp"})
|
||||
else:
|
||||
content = msg.get("content")
|
||||
if content:
|
||||
agent_history.append({"role": role, "content": content})
|
||||
return agent_history
|
||||
|
||||
|
||||
def _simulate_note_injection(
|
||||
agent_history: list,
|
||||
history: list,
|
||||
user_message: str,
|
||||
resume_entry: SessionEntry | None,
|
||||
*,
|
||||
agent_history: list | None = None,
|
||||
window_secs: float | None = None,
|
||||
) -> str:
|
||||
"""Mirror the note-injection logic in gateway/run.py _run_agent().
|
||||
|
||||
Matches the production code in the ``run_sync`` closure so we can
|
||||
test the decision tree without a full gateway runner.
|
||||
The freshness signal reads ``history[-1].timestamp`` (the raw transcript
|
||||
row), NOT ``agent_history[-1].timestamp`` (which has been stripped).
|
||||
Tests pass the raw ``history`` — ``agent_history`` is derived from it
|
||||
via the real conversion if not supplied explicitly.
|
||||
"""
|
||||
if agent_history is None:
|
||||
agent_history = _build_agent_history(history)
|
||||
|
||||
window = (
|
||||
float(window_secs)
|
||||
if window_secs is not None
|
||||
else _auto_continue_freshness_window()
|
||||
)
|
||||
interruption_is_fresh = _is_fresh_gateway_interruption(
|
||||
_last_transcript_timestamp(history),
|
||||
window_secs=window,
|
||||
)
|
||||
|
||||
message = user_message
|
||||
is_resume_pending = bool(
|
||||
resume_entry is not None
|
||||
and getattr(resume_entry, "resume_pending", False)
|
||||
and _is_fresh_gateway_interruption(
|
||||
getattr(resume_entry, "last_resume_marked_at", None)
|
||||
)
|
||||
and interruption_is_fresh
|
||||
)
|
||||
has_fresh_tool_tail = bool(
|
||||
agent_history
|
||||
and agent_history[-1].get("role") == "tool"
|
||||
and _is_fresh_gateway_interruption(agent_history[-1].get("timestamp"))
|
||||
and interruption_is_fresh
|
||||
)
|
||||
|
||||
if is_resume_pending:
|
||||
|
|
@ -366,7 +412,9 @@ class TestResumePendingSystemNote:
|
|||
def test_resume_pending_restart_note_mentions_restart(self):
|
||||
entry = self._pending_entry(reason="restart_timeout")
|
||||
result = _simulate_note_injection(
|
||||
agent_history=[{"role": "assistant", "content": "in progress"}],
|
||||
history=[
|
||||
{"role": "assistant", "content": "in progress", "timestamp": time.time()},
|
||||
],
|
||||
user_message="what happened?",
|
||||
resume_entry=entry,
|
||||
)
|
||||
|
|
@ -377,7 +425,9 @@ class TestResumePendingSystemNote:
|
|||
def test_resume_pending_shutdown_note_mentions_shutdown(self):
|
||||
entry = self._pending_entry(reason="shutdown_timeout")
|
||||
result = _simulate_note_injection(
|
||||
agent_history=[{"role": "assistant", "content": "in progress"}],
|
||||
history=[
|
||||
{"role": "assistant", "content": "in progress", "timestamp": time.time()},
|
||||
],
|
||||
user_message="ping",
|
||||
resume_entry=entry,
|
||||
)
|
||||
|
|
@ -388,8 +438,8 @@ class TestResumePendingSystemNote:
|
|||
even when the transcript's last role is NOT ``tool``."""
|
||||
entry = self._pending_entry()
|
||||
history = [
|
||||
{"role": "user", "content": "run a long thing"},
|
||||
{"role": "assistant", "content": "ok, starting..."},
|
||||
{"role": "user", "content": "run a long thing", "timestamp": time.time() - 10},
|
||||
{"role": "assistant", "content": "ok, starting...", "timestamp": time.time()},
|
||||
]
|
||||
result = _simulate_note_injection(history, "ping", resume_entry=entry)
|
||||
assert "[System note:" in result
|
||||
|
|
@ -402,8 +452,9 @@ class TestResumePendingSystemNote:
|
|||
history = [
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"id": "c1", "function": {"name": "x", "arguments": "{}"}},
|
||||
]},
|
||||
{"role": "tool", "tool_call_id": "c1", "content": "result"},
|
||||
], "timestamp": time.time() - 1},
|
||||
{"role": "tool", "tool_call_id": "c1", "content": "result",
|
||||
"timestamp": time.time()},
|
||||
]
|
||||
result = _simulate_note_injection(history, "ping", resume_entry=entry)
|
||||
assert result.count("[System note:") == 1
|
||||
|
|
@ -416,31 +467,41 @@ class TestResumePendingSystemNote:
|
|||
history = [
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"id": "c1", "function": {"name": "x", "arguments": "{}"}},
|
||||
]},
|
||||
{"role": "tool", "tool_call_id": "c1", "content": "result"},
|
||||
], "timestamp": time.time() - 1},
|
||||
{"role": "tool", "tool_call_id": "c1", "content": "result",
|
||||
"timestamp": time.time()},
|
||||
]
|
||||
result = _simulate_note_injection(history, "ping", resume_entry=None)
|
||||
assert "[System note:" in result
|
||||
assert "tool result" in result
|
||||
|
||||
def test_stale_resume_pending_does_not_inject_restart_note(self):
|
||||
"""Old restart markers must not revive an unrelated stale task."""
|
||||
"""Old restart markers must not revive an unrelated stale task.
|
||||
|
||||
The transcript's last row is from an hour ago — well outside the
|
||||
default 1h freshness window (fixture uses window=1800 to exercise
|
||||
the stale path without tying the test to the production default).
|
||||
"""
|
||||
entry = self._pending_entry()
|
||||
entry.last_resume_marked_at = datetime.now() - timedelta(hours=1)
|
||||
|
||||
history = [
|
||||
{"role": "assistant", "content": "old in progress",
|
||||
"timestamp": time.time() - 3600},
|
||||
]
|
||||
result = _simulate_note_injection(
|
||||
agent_history=[{"role": "assistant", "content": "old in progress"}],
|
||||
history=history,
|
||||
user_message="start a new task",
|
||||
resume_entry=entry,
|
||||
window_secs=1800,
|
||||
)
|
||||
|
||||
assert result == "start a new task"
|
||||
|
||||
def test_fresh_tool_tail_preserves_auto_continue_note(self):
|
||||
history = [
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"id": "c1", "function": {"name": "x", "arguments": "{}"}},
|
||||
]},
|
||||
], "timestamp": time.time() - 1},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "c1",
|
||||
|
|
@ -453,10 +514,15 @@ class TestResumePendingSystemNote:
|
|||
assert "tool result" in result
|
||||
|
||||
def test_stale_tool_tail_does_not_inject_auto_continue_note(self):
|
||||
"""The core bug fix: stale tool-tail must not revive a dead task.
|
||||
|
||||
Uses window_secs=1800 (30 min) to verify the gate fires at 1h —
|
||||
keeps the test stable regardless of the production default.
|
||||
"""
|
||||
history = [
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"id": "c1", "function": {"name": "x", "arguments": "{}"}},
|
||||
]},
|
||||
], "timestamp": time.time() - 3601},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "c1",
|
||||
|
|
@ -464,18 +530,210 @@ class TestResumePendingSystemNote:
|
|||
"timestamp": time.time() - 3600,
|
||||
},
|
||||
]
|
||||
result = _simulate_note_injection(history, "start a new task", resume_entry=None)
|
||||
result = _simulate_note_injection(
|
||||
history,
|
||||
"start a new task",
|
||||
resume_entry=None,
|
||||
window_secs=1800,
|
||||
)
|
||||
assert result == "start a new task"
|
||||
|
||||
def test_stale_tool_tail_with_production_data_shape(self):
|
||||
"""Regression guard for #16802: exercise the REAL production path
|
||||
where ``agent_history`` has been stripped of timestamps.
|
||||
|
||||
The original PR #16802 fix read ``agent_history[-1].get("timestamp")``
|
||||
— which is always ``None`` at runtime because the gateway strips
|
||||
``timestamp`` off tool/tool_call rows in ``history → agent_history``.
|
||||
This test builds a stale history, runs it through the real
|
||||
``_build_agent_history`` conversion, then asserts:
|
||||
|
||||
1. The stripped ``agent_history`` carries NO timestamp (protects
|
||||
against someone "fixing" the original PR by re-adding the
|
||||
stripped field — which would break the API contract).
|
||||
2. The freshness gate still correctly classifies the transcript
|
||||
as stale because the signal is read from ``history`` BEFORE
|
||||
the strip.
|
||||
3. No auto-continue note is injected.
|
||||
"""
|
||||
history = [
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"id": "c1", "function": {"name": "x", "arguments": "{}"}},
|
||||
], "timestamp": time.time() - 7201},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "c1",
|
||||
"content": "stale result",
|
||||
"timestamp": time.time() - 7200, # 2 hours old
|
||||
},
|
||||
]
|
||||
agent_history = _build_agent_history(history)
|
||||
|
||||
# Invariant 1: strip contract preserved
|
||||
assert agent_history[-1]["role"] == "tool"
|
||||
assert "timestamp" not in agent_history[-1], (
|
||||
"agent_history tool rows must NOT carry a timestamp — the "
|
||||
"freshness gate must read from raw history, not agent_history"
|
||||
)
|
||||
|
||||
# Invariant 2+3: stale classification, no note injection
|
||||
result = _simulate_note_injection(
|
||||
history,
|
||||
"start a new task",
|
||||
resume_entry=None,
|
||||
agent_history=agent_history,
|
||||
)
|
||||
assert result == "start a new task"
|
||||
|
||||
def test_freshness_gate_disabled_via_zero_window(self):
|
||||
"""window_secs=0 restores pre-fix behaviour (always inject)."""
|
||||
history = [
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"id": "c1", "function": {"name": "x", "arguments": "{}"}},
|
||||
], "timestamp": time.time() - 86400},
|
||||
{
|
||||
"role": "tool",
|
||||
"tool_call_id": "c1",
|
||||
"content": "day-old result",
|
||||
"timestamp": time.time() - 86400, # 24 hours old
|
||||
},
|
||||
]
|
||||
result = _simulate_note_injection(
|
||||
history, "ping", resume_entry=None, window_secs=0,
|
||||
)
|
||||
assert "[System note:" in result
|
||||
assert "tool result" in result
|
||||
|
||||
def test_legacy_history_without_timestamps_still_injects(self):
|
||||
"""Transcripts predating timestamp persistence must keep the old
|
||||
behaviour — freshness unknown → treat as fresh."""
|
||||
history = [
|
||||
{"role": "assistant", "content": None, "tool_calls": [
|
||||
{"id": "c1", "function": {"name": "x", "arguments": "{}"}},
|
||||
]},
|
||||
{"role": "tool", "tool_call_id": "c1", "content": "result"},
|
||||
]
|
||||
result = _simulate_note_injection(history, "ping", resume_entry=None)
|
||||
assert "[System note:" in result
|
||||
assert "tool result" in result
|
||||
|
||||
def test_no_note_when_nothing_to_resume(self):
|
||||
history = [
|
||||
{"role": "user", "content": "hello"},
|
||||
{"role": "assistant", "content": "hi"},
|
||||
{"role": "user", "content": "hello", "timestamp": time.time() - 2},
|
||||
{"role": "assistant", "content": "hi", "timestamp": time.time() - 1},
|
||||
]
|
||||
result = _simulate_note_injection(history, "ping", resume_entry=None)
|
||||
assert result == "ping"
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Freshness helpers
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestFreshnessHelpers:
|
||||
def test_coerce_datetime(self):
|
||||
now = datetime.now()
|
||||
assert _coerce_gateway_timestamp(now) == pytest.approx(now.timestamp(), abs=1e-3)
|
||||
|
||||
def test_coerce_epoch_seconds(self):
|
||||
assert _coerce_gateway_timestamp(1_700_000_000) == 1_700_000_000.0
|
||||
assert _coerce_gateway_timestamp(1_700_000_000.5) == 1_700_000_000.5
|
||||
|
||||
def test_coerce_epoch_milliseconds(self):
|
||||
# Values > 10^10 treated as ms
|
||||
assert _coerce_gateway_timestamp(1_700_000_000_000) == 1_700_000_000.0
|
||||
|
||||
def test_coerce_iso_string(self):
|
||||
iso = "2026-04-18T12:00:00+00:00"
|
||||
expected = datetime.fromisoformat(iso).timestamp()
|
||||
assert _coerce_gateway_timestamp(iso) == pytest.approx(expected, abs=1e-3)
|
||||
|
||||
def test_coerce_iso_string_with_z_suffix(self):
|
||||
iso_z = "2026-04-18T12:00:00Z"
|
||||
expected = datetime.fromisoformat("2026-04-18T12:00:00+00:00").timestamp()
|
||||
assert _coerce_gateway_timestamp(iso_z) == pytest.approx(expected, abs=1e-3)
|
||||
|
||||
def test_coerce_numeric_string(self):
|
||||
assert _coerce_gateway_timestamp("1700000000") == 1_700_000_000.0
|
||||
|
||||
def test_coerce_rejects_garbage(self):
|
||||
assert _coerce_gateway_timestamp(None) is None
|
||||
assert _coerce_gateway_timestamp("") is None
|
||||
assert _coerce_gateway_timestamp("not-a-timestamp") is None
|
||||
assert _coerce_gateway_timestamp(True) is None # bool rejected
|
||||
assert _coerce_gateway_timestamp(False) is None
|
||||
assert _coerce_gateway_timestamp([1, 2, 3]) is None
|
||||
|
||||
def test_is_fresh_unknown_is_fresh(self):
|
||||
"""Legacy-compat: unknown timestamp → fresh."""
|
||||
assert _is_fresh_gateway_interruption(None) is True
|
||||
assert _is_fresh_gateway_interruption("not-a-timestamp") is True
|
||||
|
||||
def test_is_fresh_window_bounds(self):
|
||||
now = 1_700_000_000.0
|
||||
# 1h window, 30min old → fresh
|
||||
assert _is_fresh_gateway_interruption(
|
||||
now - 1800, now=now, window_secs=3600,
|
||||
) is True
|
||||
# 1h window, 2h old → stale
|
||||
assert _is_fresh_gateway_interruption(
|
||||
now - 7200, now=now, window_secs=3600,
|
||||
) is False
|
||||
# 1h window, exactly at boundary → fresh (<=)
|
||||
assert _is_fresh_gateway_interruption(
|
||||
now - 3600, now=now, window_secs=3600,
|
||||
) is True
|
||||
|
||||
def test_is_fresh_zero_window_always_fresh(self):
|
||||
"""Opt-out: window_secs=0 disables the gate entirely."""
|
||||
assert _is_fresh_gateway_interruption(
|
||||
0.0, now=1_700_000_000.0, window_secs=0,
|
||||
) is True
|
||||
assert _is_fresh_gateway_interruption(
|
||||
-1.0, now=1_700_000_000.0, window_secs=-5,
|
||||
) is True
|
||||
|
||||
def test_last_transcript_timestamp_skips_meta(self):
|
||||
history = [
|
||||
{"role": "user", "content": "hi", "timestamp": 100.0},
|
||||
{"role": "assistant", "content": "hey", "timestamp": 200.0},
|
||||
{"role": "session_meta", "content": "tools:{}", "timestamp": 999.0},
|
||||
{"role": "system", "content": "ignore", "timestamp": 999.0},
|
||||
]
|
||||
assert _last_transcript_timestamp(history) == 200.0
|
||||
|
||||
def test_last_transcript_timestamp_empty(self):
|
||||
assert _last_transcript_timestamp([]) is None
|
||||
assert _last_transcript_timestamp(None) is None
|
||||
|
||||
def test_last_transcript_timestamp_row_without_timestamp(self):
|
||||
"""Legacy transcript row (no timestamp) returns None → caller
|
||||
treats as fresh."""
|
||||
history = [
|
||||
{"role": "user", "content": "hi"},
|
||||
{"role": "assistant", "content": "hey"},
|
||||
]
|
||||
assert _last_transcript_timestamp(history) is None
|
||||
|
||||
def test_auto_continue_freshness_window_reads_env(self, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_AUTO_CONTINUE_FRESHNESS", "7200")
|
||||
assert _auto_continue_freshness_window() == 7200.0
|
||||
|
||||
def test_auto_continue_freshness_window_default_when_unset(self, monkeypatch):
|
||||
monkeypatch.delenv("HERMES_AUTO_CONTINUE_FRESHNESS", raising=False)
|
||||
# Default is 1 hour
|
||||
assert _auto_continue_freshness_window() == 3600.0
|
||||
|
||||
def test_auto_continue_freshness_window_malformed_falls_back(self, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_AUTO_CONTINUE_FRESHNESS", "not-a-number")
|
||||
assert _auto_continue_freshness_window() == 3600.0
|
||||
|
||||
def test_auto_continue_freshness_window_empty_falls_back(self, monkeypatch):
|
||||
monkeypatch.setenv("HERMES_AUTO_CONTINUE_FRESHNESS", "")
|
||||
assert _auto_continue_freshness_window() == 3600.0
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Drain-timeout path marks sessions resume_pending
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue