From 7444e49d4e5888d5d89f1ef01761fec838d10419 Mon Sep 17 00:00:00 2001 From: teknium1 Date: Tue, 28 Apr 2026 05:00:26 -0700 Subject: [PATCH] fix(gateway): use transcript timestamp for auto-continue freshness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Follow-up to PR #16802 (BeliefanX). The original fix read `agent_history[-1].get("timestamp")` for the tool-tail freshness gate, but `gateway/run.py` strips the `timestamp` field off all tool/tool_call rows when building `agent_history` from the raw transcript (see `clean_msg = {k: v for k, v in msg.items() if k != "timestamp"}`). At runtime the tool-tail branch always saw `None` and silently took the legacy-fresh path — the stale-guard never fired for the tool-tail case it was supposed to cover. Changes: - Read the freshness signal from the RAW `history` list (via new `_last_transcript_timestamp()` helper) BEFORE the strip. Both the resume_pending branch and the tool-tail branch use this single signal, replacing the two divergent ones. - Default window bumped 15 min → 1 hour via new `_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT`. The 15-minute default was shorter than the default `gateway_timeout` of 30 min, so a legitimate long-running turn interrupted near its timeout boundary and resumed shortly after would have been misclassified as stale. - Configurable via `config.yaml` `agent.gateway_auto_continue_freshness` (bridged to `HERMES_AUTO_CONTINUE_FRESHNESS` at gateway startup — same pattern as `gateway_timeout`). Set to 0 to disable the gate. - `_coerce_gateway_timestamp` now explicitly rejects bool (which is a subclass of int and would otherwise coerce to 0.0/1.0). - Tests rewritten to exercise the real production data shape: raw `history` → `_build_agent_history` strip → freshness decision. A regression guard (`test_stale_tool_tail_with_production_data_shape`) asserts `agent_history` tool rows carry NO timestamp, protecting against someone "fixing" the original bug by re-adding the stripped field (which would break the OpenAI tool-result message contract). Add BeliefanX to scripts/release.py AUTHOR_MAP. E2E verified: config.yaml → env var bridge → helper returns configured value; default 1h window; malformed/empty env var falls back to default; ISO-Z timestamps parse; ms-epoch coerced; bool rejected. --- gateway/run.py | 121 +++++++- hermes_cli/config.py | 14 + scripts/release.py | 1 + tests/gateway/test_restart_resume_pending.py | 306 +++++++++++++++++-- 4 files changed, 407 insertions(+), 35 deletions(-) diff --git a/gateway/run.py b/gateway/run.py index edd33c8709..e573514ecf 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -43,7 +43,22 @@ _AGENT_CACHE_IDLE_TTL_SECS = 3600.0 # evict agents idle for >1h # Only auto-continue interrupted gateway turns while the interruption is fresh. # Stale tool-tail/resume markers can otherwise revive an unrelated old task # after a gateway restart when the user's next message starts new work. -_AUTO_CONTINUE_FRESHNESS_SECS = 15 * 60 +# +# The freshness signal is the timestamp of the last transcript row, which +# ``hermes_state.get_messages`` carries on every persisted message. This +# handles the two auto-continue cases uniformly: +# * resume_pending (gateway restart/shutdown watchdog marked the session) +# * tool-tail (last persisted message is a tool result the agent +# never got to reply to) +# In both cases "when did we last do anything on this transcript" is the +# correct freshness question, so one signal replaces two divergent ones. +# +# Default window: 1 hour. This comfortably covers ``agent.gateway_timeout`` +# (30 min default) plus runtime slack — a legitimate long-running turn that +# gets interrupted near its timeout boundary and is resumed shortly after +# is still classified fresh. Override via +# ``config.yaml`` ``agent.gateway_auto_continue_freshness``. +_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT = 60 * 60 def _coerce_gateway_timestamp(value: Any) -> Optional[float]: @@ -51,11 +66,16 @@ def _coerce_gateway_timestamp(value: Any) -> Optional[float]: Missing/unparseable timestamps return None so legacy transcripts keep the historical auto-continue behaviour instead of being silently dropped. + Accepts: datetime, epoch seconds (int/float), epoch milliseconds (when + the magnitude exceeds year-2286), ISO-8601 strings (with or without a + trailing ``Z``), and numeric strings. """ if value is None: return None if isinstance(value, datetime): return value.timestamp() + if isinstance(value, bool): # bool is a subclass of int — skip it + return None if isinstance(value, (int, float)): # Some platform events use milliseconds; Hermes state rows use seconds. return float(value) / 1000.0 if float(value) > 10_000_000_000 else float(value) @@ -75,17 +95,77 @@ def _coerce_gateway_timestamp(value: Any) -> Optional[float]: return None -def _is_fresh_gateway_interruption(value: Any, *, now: Optional[float] = None) -> bool: +def _auto_continue_freshness_window() -> float: + """Return the configured auto-continue freshness window in seconds. + + Reads ``HERMES_AUTO_CONTINUE_FRESHNESS`` (bridged from + ``config.yaml`` ``agent.gateway_auto_continue_freshness`` at gateway + startup, same pattern as ``HERMES_AGENT_TIMEOUT``). Falls back to the + module default when unset or malformed. Non-positive values disable + the freshness gate (restores the pre-fix "always fresh" behaviour for + users who want to opt out). + """ + raw = os.environ.get("HERMES_AUTO_CONTINUE_FRESHNESS") + if raw is None or raw == "": + return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT) + try: + return float(raw) + except (TypeError, ValueError): + return float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT) + + +def _is_fresh_gateway_interruption( + value: Any, + *, + now: Optional[float] = None, + window_secs: Optional[float] = None, +) -> bool: """Return True when an interruption marker is fresh enough to auto-continue. Unknown timestamps are treated as fresh for backward compatibility with - legacy transcripts/tests that predate timestamp persistence. + legacy transcripts (pre-dating timestamp persistence) and with in-memory + test scaffolding that constructs history entries without timestamps. + + A non-positive ``window_secs`` disables the gate (always fresh), which + restores the pre-fix behaviour for users who opt out via config. """ + window = ( + float(window_secs) + if window_secs is not None + else float(_AUTO_CONTINUE_FRESHNESS_SECS_DEFAULT) + ) + if window <= 0: + return True timestamp = _coerce_gateway_timestamp(value) if timestamp is None: return True current = time.time() if now is None else now - return current - timestamp <= _AUTO_CONTINUE_FRESHNESS_SECS + return current - timestamp <= window + + +def _last_transcript_timestamp(history: Optional[List[Dict[str, Any]]]) -> Any: + """Return the ``timestamp`` of the last usable transcript row, if any. + + Skips metadata-only rows (``session_meta``, system injections) that are + dropped before being handed to the agent. Returns ``None`` when no + usable row carries a timestamp — callers should treat that as "fresh" + for backward compatibility. + """ + if not history: + return None + for msg in reversed(history): + if not isinstance(msg, dict): + continue + role = msg.get("role") + if not role or role in ("session_meta", "system"): + continue + ts = msg.get("timestamp") + if ts is not None: + return ts + # First non-meta row without a timestamp — legacy transcript row. + # Returning None lets the caller fall through to the legacy-fresh path. + return None + return None # --------------------------------------------------------------------------- @@ -260,6 +340,13 @@ if _config_path.exists(): os.environ["HERMES_AGENT_NOTIFY_INTERVAL"] = str(_agent_cfg["gateway_notify_interval"]) if "restart_drain_timeout" in _agent_cfg and "HERMES_RESTART_DRAIN_TIMEOUT" not in os.environ: os.environ["HERMES_RESTART_DRAIN_TIMEOUT"] = str(_agent_cfg["restart_drain_timeout"]) + if ( + "gateway_auto_continue_freshness" in _agent_cfg + and "HERMES_AUTO_CONTINUE_FRESHNESS" not in os.environ + ): + os.environ["HERMES_AUTO_CONTINUE_FRESHNESS"] = str( + _agent_cfg["gateway_auto_continue_freshness"] + ) _display_cfg = _cfg.get("display", {}) if _display_cfg and isinstance(_display_cfg, dict): if "busy_input_mode" in _display_cfg and "HERMES_GATEWAY_BUSY_INPUT_MODE" not in os.environ: @@ -10554,26 +10641,38 @@ class GatewayRunner: # anything (tool, assistant with unfinished work, etc.), so we # give a stronger, reason-aware instruction that subsumes the # tool-tail case. + # + # Freshness gate (#16802): both branches are gated on the age + # of the last persisted transcript row. That is the correct + # "when did we last do anything here" signal for both the + # resume_pending path (restart watchdog) and the tool-tail + # path (in-flight tool loop killed). We read ``history[-1]`` + # here because ``agent_history`` has already stripped the + # ``timestamp`` field off tool/tool_call rows for API purity + # (see the `k != "timestamp"` filter above). Rows without a + # timestamp (legacy transcripts) are treated as fresh so the + # historical auto-continue behaviour is preserved. + _freshness_window = _auto_continue_freshness_window() + _interruption_is_fresh = _is_fresh_gateway_interruption( + _last_transcript_timestamp(history), + window_secs=_freshness_window, + ) + _resume_entry = None if session_key: try: _resume_entry = self.session_store._entries.get(session_key) except Exception: _resume_entry = None - _resume_marked_at = ( - getattr(_resume_entry, "last_resume_marked_at", None) - if _resume_entry is not None - else None - ) _is_resume_pending = bool( _resume_entry is not None and getattr(_resume_entry, "resume_pending", False) - and _is_fresh_gateway_interruption(_resume_marked_at) + and _interruption_is_fresh ) _has_fresh_tool_tail = bool( agent_history and agent_history[-1].get("role") == "tool" - and _is_fresh_gateway_interruption(agent_history[-1].get("timestamp")) + and _interruption_is_fresh ) if _is_resume_pending: diff --git a/hermes_cli/config.py b/hermes_cli/config.py index ddb3dec0ac..210cb12d6f 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -411,6 +411,20 @@ DEFAULT_CONFIG = { # (60+ tool iterations with tiny output) before users assume the # bot is dead and /restart. "gateway_notify_interval": 180, + # Freshness window for the gateway auto-continue note (seconds). + # After a gateway crash/restart/SIGTERM mid-run, the next user + # message gets a "[System note: your previous turn was + # interrupted — process the unfinished tool result(s) first]" + # prepended so the model picks up where it left off. That's the + # right behaviour while the interruption is fresh, but stale + # markers (transcript last touched hours or days ago) can revive + # an unrelated old task when the user's next message starts new + # work. This window is the max age of the last persisted + # transcript row for which we still inject the continue note. + # Default 3600s comfortably covers a long turn (gateway_timeout + # default is 1800s) plus runtime slack. Set to 0 to disable the + # gate and restore pre-fix behaviour (always inject). + "gateway_auto_continue_freshness": 3600, # How user-attached images are presented to the main model on each turn. # "auto" — attach natively when the active model reports # supports_vision=True AND the user hasn't explicitly diff --git a/scripts/release.py b/scripts/release.py index ed8be331ce..5c1503da2f 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -58,6 +58,7 @@ AUTHOR_MAP = { "yes999zc@163.com": "yes999zc", "343873859@qq.com": "DrStrangerUJN", "uzmpsk.dilekakbas@gmail.com": "dlkakbs", + "beliefanx@gmail.com": "BeliefanX", "jefferson@heimdallstrategy.com": "Mind-Dragon", "steve.westerhouse@origami-analytics.com": "westers", "130918800+devorun@users.noreply.github.com": "devorun", diff --git a/tests/gateway/test_restart_resume_pending.py b/tests/gateway/test_restart_resume_pending.py index 942d8d238d..b8937cd4df 100644 --- a/tests/gateway/test_restart_resume_pending.py +++ b/tests/gateway/test_restart_resume_pending.py @@ -33,7 +33,12 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest from gateway.config import GatewayConfig, Platform, PlatformConfig -from gateway.run import _is_fresh_gateway_interruption +from gateway.run import ( + _auto_continue_freshness_window, + _coerce_gateway_timestamp, + _is_fresh_gateway_interruption, + _last_transcript_timestamp, +) from gateway.session import SessionEntry, SessionSource, SessionStore from tests.gateway.restart_test_helpers import ( make_restart_runner, @@ -54,28 +59,69 @@ def _make_store(tmp_path): return SessionStore(sessions_dir=tmp_path, config=GatewayConfig()) +def _build_agent_history(history: list) -> list: + """Mirror gateway/run.py's ``history → agent_history`` conversion. + + This is the transformation that strips ``timestamp`` off tool/tool_call + rows before the agent sees them. Tests that check the freshness gate + must go through this conversion so they exercise the *real* data the + note-injection code sees. + """ + agent_history: list = [] + for msg in history: + role = msg.get("role") + if not role or role in ("session_meta", "system"): + continue + has_tool_calls = "tool_calls" in msg + has_tool_call_id = "tool_call_id" in msg + is_tool_message = role == "tool" + if has_tool_calls or has_tool_call_id or is_tool_message: + agent_history.append({k: v for k, v in msg.items() if k != "timestamp"}) + else: + content = msg.get("content") + if content: + agent_history.append({"role": role, "content": content}) + return agent_history + + def _simulate_note_injection( - agent_history: list, + history: list, user_message: str, resume_entry: SessionEntry | None, + *, + agent_history: list | None = None, + window_secs: float | None = None, ) -> str: """Mirror the note-injection logic in gateway/run.py _run_agent(). - Matches the production code in the ``run_sync`` closure so we can - test the decision tree without a full gateway runner. + The freshness signal reads ``history[-1].timestamp`` (the raw transcript + row), NOT ``agent_history[-1].timestamp`` (which has been stripped). + Tests pass the raw ``history`` — ``agent_history`` is derived from it + via the real conversion if not supplied explicitly. """ + if agent_history is None: + agent_history = _build_agent_history(history) + + window = ( + float(window_secs) + if window_secs is not None + else _auto_continue_freshness_window() + ) + interruption_is_fresh = _is_fresh_gateway_interruption( + _last_transcript_timestamp(history), + window_secs=window, + ) + message = user_message is_resume_pending = bool( resume_entry is not None and getattr(resume_entry, "resume_pending", False) - and _is_fresh_gateway_interruption( - getattr(resume_entry, "last_resume_marked_at", None) - ) + and interruption_is_fresh ) has_fresh_tool_tail = bool( agent_history and agent_history[-1].get("role") == "tool" - and _is_fresh_gateway_interruption(agent_history[-1].get("timestamp")) + and interruption_is_fresh ) if is_resume_pending: @@ -366,7 +412,9 @@ class TestResumePendingSystemNote: def test_resume_pending_restart_note_mentions_restart(self): entry = self._pending_entry(reason="restart_timeout") result = _simulate_note_injection( - agent_history=[{"role": "assistant", "content": "in progress"}], + history=[ + {"role": "assistant", "content": "in progress", "timestamp": time.time()}, + ], user_message="what happened?", resume_entry=entry, ) @@ -377,7 +425,9 @@ class TestResumePendingSystemNote: def test_resume_pending_shutdown_note_mentions_shutdown(self): entry = self._pending_entry(reason="shutdown_timeout") result = _simulate_note_injection( - agent_history=[{"role": "assistant", "content": "in progress"}], + history=[ + {"role": "assistant", "content": "in progress", "timestamp": time.time()}, + ], user_message="ping", resume_entry=entry, ) @@ -388,8 +438,8 @@ class TestResumePendingSystemNote: even when the transcript's last role is NOT ``tool``.""" entry = self._pending_entry() history = [ - {"role": "user", "content": "run a long thing"}, - {"role": "assistant", "content": "ok, starting..."}, + {"role": "user", "content": "run a long thing", "timestamp": time.time() - 10}, + {"role": "assistant", "content": "ok, starting...", "timestamp": time.time()}, ] result = _simulate_note_injection(history, "ping", resume_entry=entry) assert "[System note:" in result @@ -402,8 +452,9 @@ class TestResumePendingSystemNote: history = [ {"role": "assistant", "content": None, "tool_calls": [ {"id": "c1", "function": {"name": "x", "arguments": "{}"}}, - ]}, - {"role": "tool", "tool_call_id": "c1", "content": "result"}, + ], "timestamp": time.time() - 1}, + {"role": "tool", "tool_call_id": "c1", "content": "result", + "timestamp": time.time()}, ] result = _simulate_note_injection(history, "ping", resume_entry=entry) assert result.count("[System note:") == 1 @@ -416,31 +467,41 @@ class TestResumePendingSystemNote: history = [ {"role": "assistant", "content": None, "tool_calls": [ {"id": "c1", "function": {"name": "x", "arguments": "{}"}}, - ]}, - {"role": "tool", "tool_call_id": "c1", "content": "result"}, + ], "timestamp": time.time() - 1}, + {"role": "tool", "tool_call_id": "c1", "content": "result", + "timestamp": time.time()}, ] result = _simulate_note_injection(history, "ping", resume_entry=None) assert "[System note:" in result assert "tool result" in result def test_stale_resume_pending_does_not_inject_restart_note(self): - """Old restart markers must not revive an unrelated stale task.""" + """Old restart markers must not revive an unrelated stale task. + + The transcript's last row is from an hour ago — well outside the + default 1h freshness window (fixture uses window=1800 to exercise + the stale path without tying the test to the production default). + """ entry = self._pending_entry() entry.last_resume_marked_at = datetime.now() - timedelta(hours=1) + history = [ + {"role": "assistant", "content": "old in progress", + "timestamp": time.time() - 3600}, + ] result = _simulate_note_injection( - agent_history=[{"role": "assistant", "content": "old in progress"}], + history=history, user_message="start a new task", resume_entry=entry, + window_secs=1800, ) - assert result == "start a new task" def test_fresh_tool_tail_preserves_auto_continue_note(self): history = [ {"role": "assistant", "content": None, "tool_calls": [ {"id": "c1", "function": {"name": "x", "arguments": "{}"}}, - ]}, + ], "timestamp": time.time() - 1}, { "role": "tool", "tool_call_id": "c1", @@ -453,10 +514,15 @@ class TestResumePendingSystemNote: assert "tool result" in result def test_stale_tool_tail_does_not_inject_auto_continue_note(self): + """The core bug fix: stale tool-tail must not revive a dead task. + + Uses window_secs=1800 (30 min) to verify the gate fires at 1h — + keeps the test stable regardless of the production default. + """ history = [ {"role": "assistant", "content": None, "tool_calls": [ {"id": "c1", "function": {"name": "x", "arguments": "{}"}}, - ]}, + ], "timestamp": time.time() - 3601}, { "role": "tool", "tool_call_id": "c1", @@ -464,18 +530,210 @@ class TestResumePendingSystemNote: "timestamp": time.time() - 3600, }, ] - result = _simulate_note_injection(history, "start a new task", resume_entry=None) + result = _simulate_note_injection( + history, + "start a new task", + resume_entry=None, + window_secs=1800, + ) assert result == "start a new task" + def test_stale_tool_tail_with_production_data_shape(self): + """Regression guard for #16802: exercise the REAL production path + where ``agent_history`` has been stripped of timestamps. + + The original PR #16802 fix read ``agent_history[-1].get("timestamp")`` + — which is always ``None`` at runtime because the gateway strips + ``timestamp`` off tool/tool_call rows in ``history → agent_history``. + This test builds a stale history, runs it through the real + ``_build_agent_history`` conversion, then asserts: + + 1. The stripped ``agent_history`` carries NO timestamp (protects + against someone "fixing" the original PR by re-adding the + stripped field — which would break the API contract). + 2. The freshness gate still correctly classifies the transcript + as stale because the signal is read from ``history`` BEFORE + the strip. + 3. No auto-continue note is injected. + """ + history = [ + {"role": "assistant", "content": None, "tool_calls": [ + {"id": "c1", "function": {"name": "x", "arguments": "{}"}}, + ], "timestamp": time.time() - 7201}, + { + "role": "tool", + "tool_call_id": "c1", + "content": "stale result", + "timestamp": time.time() - 7200, # 2 hours old + }, + ] + agent_history = _build_agent_history(history) + + # Invariant 1: strip contract preserved + assert agent_history[-1]["role"] == "tool" + assert "timestamp" not in agent_history[-1], ( + "agent_history tool rows must NOT carry a timestamp — the " + "freshness gate must read from raw history, not agent_history" + ) + + # Invariant 2+3: stale classification, no note injection + result = _simulate_note_injection( + history, + "start a new task", + resume_entry=None, + agent_history=agent_history, + ) + assert result == "start a new task" + + def test_freshness_gate_disabled_via_zero_window(self): + """window_secs=0 restores pre-fix behaviour (always inject).""" + history = [ + {"role": "assistant", "content": None, "tool_calls": [ + {"id": "c1", "function": {"name": "x", "arguments": "{}"}}, + ], "timestamp": time.time() - 86400}, + { + "role": "tool", + "tool_call_id": "c1", + "content": "day-old result", + "timestamp": time.time() - 86400, # 24 hours old + }, + ] + result = _simulate_note_injection( + history, "ping", resume_entry=None, window_secs=0, + ) + assert "[System note:" in result + assert "tool result" in result + + def test_legacy_history_without_timestamps_still_injects(self): + """Transcripts predating timestamp persistence must keep the old + behaviour — freshness unknown → treat as fresh.""" + history = [ + {"role": "assistant", "content": None, "tool_calls": [ + {"id": "c1", "function": {"name": "x", "arguments": "{}"}}, + ]}, + {"role": "tool", "tool_call_id": "c1", "content": "result"}, + ] + result = _simulate_note_injection(history, "ping", resume_entry=None) + assert "[System note:" in result + assert "tool result" in result + def test_no_note_when_nothing_to_resume(self): history = [ - {"role": "user", "content": "hello"}, - {"role": "assistant", "content": "hi"}, + {"role": "user", "content": "hello", "timestamp": time.time() - 2}, + {"role": "assistant", "content": "hi", "timestamp": time.time() - 1}, ] result = _simulate_note_injection(history, "ping", resume_entry=None) assert result == "ping" +# --------------------------------------------------------------------------- +# Freshness helpers +# --------------------------------------------------------------------------- + + +class TestFreshnessHelpers: + def test_coerce_datetime(self): + now = datetime.now() + assert _coerce_gateway_timestamp(now) == pytest.approx(now.timestamp(), abs=1e-3) + + def test_coerce_epoch_seconds(self): + assert _coerce_gateway_timestamp(1_700_000_000) == 1_700_000_000.0 + assert _coerce_gateway_timestamp(1_700_000_000.5) == 1_700_000_000.5 + + def test_coerce_epoch_milliseconds(self): + # Values > 10^10 treated as ms + assert _coerce_gateway_timestamp(1_700_000_000_000) == 1_700_000_000.0 + + def test_coerce_iso_string(self): + iso = "2026-04-18T12:00:00+00:00" + expected = datetime.fromisoformat(iso).timestamp() + assert _coerce_gateway_timestamp(iso) == pytest.approx(expected, abs=1e-3) + + def test_coerce_iso_string_with_z_suffix(self): + iso_z = "2026-04-18T12:00:00Z" + expected = datetime.fromisoformat("2026-04-18T12:00:00+00:00").timestamp() + assert _coerce_gateway_timestamp(iso_z) == pytest.approx(expected, abs=1e-3) + + def test_coerce_numeric_string(self): + assert _coerce_gateway_timestamp("1700000000") == 1_700_000_000.0 + + def test_coerce_rejects_garbage(self): + assert _coerce_gateway_timestamp(None) is None + assert _coerce_gateway_timestamp("") is None + assert _coerce_gateway_timestamp("not-a-timestamp") is None + assert _coerce_gateway_timestamp(True) is None # bool rejected + assert _coerce_gateway_timestamp(False) is None + assert _coerce_gateway_timestamp([1, 2, 3]) is None + + def test_is_fresh_unknown_is_fresh(self): + """Legacy-compat: unknown timestamp → fresh.""" + assert _is_fresh_gateway_interruption(None) is True + assert _is_fresh_gateway_interruption("not-a-timestamp") is True + + def test_is_fresh_window_bounds(self): + now = 1_700_000_000.0 + # 1h window, 30min old → fresh + assert _is_fresh_gateway_interruption( + now - 1800, now=now, window_secs=3600, + ) is True + # 1h window, 2h old → stale + assert _is_fresh_gateway_interruption( + now - 7200, now=now, window_secs=3600, + ) is False + # 1h window, exactly at boundary → fresh (<=) + assert _is_fresh_gateway_interruption( + now - 3600, now=now, window_secs=3600, + ) is True + + def test_is_fresh_zero_window_always_fresh(self): + """Opt-out: window_secs=0 disables the gate entirely.""" + assert _is_fresh_gateway_interruption( + 0.0, now=1_700_000_000.0, window_secs=0, + ) is True + assert _is_fresh_gateway_interruption( + -1.0, now=1_700_000_000.0, window_secs=-5, + ) is True + + def test_last_transcript_timestamp_skips_meta(self): + history = [ + {"role": "user", "content": "hi", "timestamp": 100.0}, + {"role": "assistant", "content": "hey", "timestamp": 200.0}, + {"role": "session_meta", "content": "tools:{}", "timestamp": 999.0}, + {"role": "system", "content": "ignore", "timestamp": 999.0}, + ] + assert _last_transcript_timestamp(history) == 200.0 + + def test_last_transcript_timestamp_empty(self): + assert _last_transcript_timestamp([]) is None + assert _last_transcript_timestamp(None) is None + + def test_last_transcript_timestamp_row_without_timestamp(self): + """Legacy transcript row (no timestamp) returns None → caller + treats as fresh.""" + history = [ + {"role": "user", "content": "hi"}, + {"role": "assistant", "content": "hey"}, + ] + assert _last_transcript_timestamp(history) is None + + def test_auto_continue_freshness_window_reads_env(self, monkeypatch): + monkeypatch.setenv("HERMES_AUTO_CONTINUE_FRESHNESS", "7200") + assert _auto_continue_freshness_window() == 7200.0 + + def test_auto_continue_freshness_window_default_when_unset(self, monkeypatch): + monkeypatch.delenv("HERMES_AUTO_CONTINUE_FRESHNESS", raising=False) + # Default is 1 hour + assert _auto_continue_freshness_window() == 3600.0 + + def test_auto_continue_freshness_window_malformed_falls_back(self, monkeypatch): + monkeypatch.setenv("HERMES_AUTO_CONTINUE_FRESHNESS", "not-a-number") + assert _auto_continue_freshness_window() == 3600.0 + + def test_auto_continue_freshness_window_empty_falls_back(self, monkeypatch): + monkeypatch.setenv("HERMES_AUTO_CONTINUE_FRESHNESS", "") + assert _auto_continue_freshness_window() == 3600.0 + + # --------------------------------------------------------------------------- # Drain-timeout path marks sessions resume_pending # ---------------------------------------------------------------------------