fix(clarify): raise default clarify_timeout to 3600s (#32762)

The 600s default evicted the gateway clarify entry while users were
still away (meeting/AFK); a later button tap then landed on a dead
entry and the agent hung on 'running: clarify'. Raise the default to
1h in DEFAULT_CONFIG and the get_clarify_timeout() code-level fallback,
documenting the running-agent-guard tradeoff. User overrides still win.
This commit is contained in:
teknium1 2026-06-28 00:45:00 -07:00 committed by Teknium
parent 3f543229f2
commit aacc15b2c9
3 changed files with 17 additions and 8 deletions

View file

@ -1012,7 +1012,13 @@ DEFAULT_CONFIG = {
# unblocks with "[user did not respond within Xm]" so it can adapt
# rather than pinning the running-agent guard forever. CLI clarify
# blocks indefinitely (input() is synchronous) and ignores this.
"clarify_timeout": 600,
# Default 3600 (1h): real users step away (meetings, AFK) and the
# old 600s default evicted the entry mid-think, so a later button
# tap landed on a dead entry (#32762). Tradeoff: a higher value
# holds the gateway's running-agent guard longer for a genuinely
# abandoned prompt — lower it if a single session must free up the
# guard sooner.
"clarify_timeout": 3600,
# Periodic "still working" notification interval (seconds).
# Sends a status message every N seconds so the user knows the
# agent hasn't died during long tasks. 0 = disable notifications.

View file

@ -167,11 +167,11 @@ class TestClarifyPrimitive:
assert b is not None and b.clarify_id == "idB"
def test_clarify_timeout_config_default(self):
"""get_clarify_timeout returns 600 by default."""
"""get_clarify_timeout returns a positive int (default 3600)."""
from tools import clarify_gateway as cm
timeout = cm.get_clarify_timeout()
# Default 600s OR whatever is in the user's loaded config.
# Default 3600s OR whatever is in the user's loaded config.
# Floor check: must be a positive int, not crashed.
assert isinstance(timeout, int)
assert timeout > 0

View file

@ -231,10 +231,13 @@ def clear_session(session_key: str) -> int:
def get_clarify_timeout() -> int:
"""Read the clarify response timeout (seconds) from config.
Defaults to 600 (10 minutes) long enough for the user to type a
thoughtful response, short enough that an abandoned prompt eventually
Defaults to 3600 (1 hour) long enough that a user who steps away
(meeting, AFK, slow to read) still finds a live entry when they tap
the button, short enough that a genuinely abandoned prompt eventually
unblocks the agent thread instead of pinning the running-agent guard
forever.
forever. The old 600s default evicted the entry mid-think, so a late
tap landed on a dead entry and the agent hung on ``running: clarify``
(#32762).
Reads ``agent.clarify_timeout`` from config.yaml.
"""
@ -242,9 +245,9 @@ def get_clarify_timeout() -> int:
from hermes_cli.config import load_config
cfg = load_config() or {}
agent_cfg = cfg.get("agent", {}) or {}
return int(agent_cfg.get("clarify_timeout", 600))
return int(agent_cfg.get("clarify_timeout", 3600))
except Exception:
return 600
return 3600
# =========================================================================