mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
The gateway-side BEHAVIOUR layer that consumes the relay scale-to-zero primitives (gateway-gateway Phase 5): the gateway decides it is idle and drives the relay transport dormant so the platform (Fly autostop:"suspend") can suspend the now-traffic-idle machine, which wakes on the connector's wakeUrl poke (decisions.md Q3=C', D1-D13). - gateway/scale_to_zero.py: pure helpers — scale_to_zero_enabled (the NAS Labs HERMES_SCALE_TO_ZERO stamp, D11/Q8=A), parse_idle_timeout_seconds (config.yaml gateway.scale_to_zero.idle_timeout_minutes, D2), messaging_is_relay_only_or_absent (F6/D1), should_arm (D1/D11/§3.4(1)), is_idle (D2/D3/F7). - gateway/run.py: _last_inbound_at clock stamped on user inbound in _handle_message (F13); the arm-gate + idle predicate + the _scale_to_zero_watcher dormant sequence (mark draining -> adapter go_dormant() -> cooldown), started only when armed. Deliberately NOT the stop path and NOT mark_resume_pending (F12/D13). - tools/process_registry.py: has_any_active() for the bg-work guard (D3/F7). - hermes_cli/config.py: gateway.scale_to_zero.idle_timeout_minutes default 5. Tests: 38 pure-logic + 6 watcher (incl. bg-work regression guard proven RED). Full relay + scale-to-zero suites: 184 passed. The 20 unrelated failures in the broader run are PRE-EXISTING on origin/main (custom-provider/tools tests), confirmed via a pristine baseline worktree.
120 lines
4.5 KiB
Python
120 lines
4.5 KiB
Python
"""Watcher-level tests for scale-to-zero: the idle watcher's dormant sequence and
|
|
the arm-gate wiring, exercised against the real GatewayRunner methods bound onto
|
|
a lightweight stand-in (booting a full gateway is unnecessary for this logic and
|
|
would be slow/flaky).
|
|
|
|
These cover the parts gateway/test_scale_to_zero.py (pure helpers) can't: that
|
|
the watcher calls the relay adapter's go_dormant() exactly when idle+armed,
|
|
respects the cooldown, and skips when busy — the F7/D3 + D12 behaviour.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import time
|
|
|
|
import pytest
|
|
|
|
from gateway.run import GatewayRunner
|
|
|
|
|
|
class _FakeRelayAdapter:
|
|
def __init__(self):
|
|
self.go_dormant_calls = 0
|
|
|
|
async def go_dormant(self):
|
|
self.go_dormant_calls += 1
|
|
return True
|
|
|
|
|
|
def _runner_with(monkeypatch, *, idle, armed_adapter=True):
|
|
"""Build a GatewayRunner without booting it, stubbing just what the watcher
|
|
touches. Real methods (_scale_to_zero_is_idle composition, the watcher body)
|
|
run; only their dependencies are stubbed."""
|
|
r = GatewayRunner.__new__(GatewayRunner)
|
|
r._running = True
|
|
r._scale_to_zero_cooldown_until = 0.0
|
|
r._last_inbound_at = time.time()
|
|
r._running_agents = {}
|
|
r._background_tasks = set()
|
|
adapter = _FakeRelayAdapter() if armed_adapter else None
|
|
|
|
monkeypatch.setattr(r, "_scale_to_zero_is_idle", lambda: idle, raising=False)
|
|
monkeypatch.setattr(r, "_relay_adapter_for_dormancy", lambda: adapter, raising=False)
|
|
monkeypatch.setattr(r, "_scale_to_zero_idle_timeout_seconds", lambda: 300.0, raising=False)
|
|
monkeypatch.setattr(r, "_update_runtime_status", lambda *a, **k: None, raising=False)
|
|
return r, adapter
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_watcher_goes_dormant_when_idle(monkeypatch):
|
|
r, adapter = _runner_with(monkeypatch, idle=True)
|
|
# Run one iteration: stop after the first sleep so the loop exits cleanly.
|
|
task = asyncio.create_task(r._scale_to_zero_watcher(interval=0.01))
|
|
await asyncio.sleep(0.1)
|
|
r._running = False
|
|
await asyncio.wait_for(task, timeout=2)
|
|
assert adapter.go_dormant_calls >= 1
|
|
# After driving dormant, a re-arm cooldown is set (0.F).
|
|
assert r._scale_to_zero_cooldown_until > time.time()
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_watcher_does_not_go_dormant_when_busy(monkeypatch):
|
|
r, adapter = _runner_with(monkeypatch, idle=False)
|
|
task = asyncio.create_task(r._scale_to_zero_watcher(interval=0.01))
|
|
await asyncio.sleep(0.1)
|
|
r._running = False
|
|
await asyncio.wait_for(task, timeout=2)
|
|
assert adapter.go_dormant_calls == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_watcher_respects_cooldown(monkeypatch):
|
|
r, adapter = _runner_with(monkeypatch, idle=True)
|
|
# Cooldown active far in the future: even though idle, no dormancy fires.
|
|
r._scale_to_zero_cooldown_until = time.time() + 3600
|
|
task = asyncio.create_task(r._scale_to_zero_watcher(interval=0.01))
|
|
await asyncio.sleep(0.1)
|
|
r._running = False
|
|
await asyncio.wait_for(task, timeout=2)
|
|
assert adapter.go_dormant_calls == 0
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
async def test_watcher_noop_when_no_relay_adapter(monkeypatch):
|
|
# Armed-but-no-relay-adapter (e.g. relay not yet connected): must not crash.
|
|
r, _ = _runner_with(monkeypatch, idle=True, armed_adapter=False)
|
|
task = asyncio.create_task(r._scale_to_zero_watcher(interval=0.01))
|
|
await asyncio.sleep(0.1)
|
|
r._running = False
|
|
await asyncio.wait_for(task, timeout=2)
|
|
# No exception, loop exits cleanly — nothing to assert beyond survival.
|
|
|
|
|
|
def test_bg_work_blocks_idle_via_background_tasks(monkeypatch):
|
|
"""_scale_to_zero_has_live_background_work() reports True when a tracked
|
|
background task is still live (D3/F7) — the guard that keeps a gateway with
|
|
an in-flight backgrounded subagent/terminal awake."""
|
|
r = GatewayRunner.__new__(GatewayRunner)
|
|
|
|
async def _never():
|
|
await asyncio.sleep(3600)
|
|
|
|
loop = asyncio.new_event_loop()
|
|
try:
|
|
t = loop.create_task(_never())
|
|
r._background_tasks = {t}
|
|
# process_registry has nothing active in this fresh process.
|
|
assert r._scale_to_zero_has_live_background_work() is True
|
|
t.cancel()
|
|
finally:
|
|
loop.run_until_complete(asyncio.gather(t, return_exceptions=True))
|
|
loop.close()
|
|
|
|
|
|
def test_bg_work_false_when_quiet():
|
|
r = GatewayRunner.__new__(GatewayRunner)
|
|
r._background_tasks = set()
|
|
# No background tasks, no active processes in this fresh process.
|
|
assert r._scale_to_zero_has_live_background_work() is False
|