mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
The gateway-side BEHAVIOUR layer that consumes the relay scale-to-zero primitives (gateway-gateway Phase 5): the gateway decides it is idle and drives the relay transport dormant so the platform (Fly autostop:"suspend") can suspend the now-traffic-idle machine, which wakes on the connector's wakeUrl poke (decisions.md Q3=C', D1-D13). - gateway/scale_to_zero.py: pure helpers — scale_to_zero_enabled (the NAS Labs HERMES_SCALE_TO_ZERO stamp, D11/Q8=A), parse_idle_timeout_seconds (config.yaml gateway.scale_to_zero.idle_timeout_minutes, D2), messaging_is_relay_only_or_absent (F6/D1), should_arm (D1/D11/§3.4(1)), is_idle (D2/D3/F7). - gateway/run.py: _last_inbound_at clock stamped on user inbound in _handle_message (F13); the arm-gate + idle predicate + the _scale_to_zero_watcher dormant sequence (mark draining -> adapter go_dormant() -> cooldown), started only when armed. Deliberately NOT the stop path and NOT mark_resume_pending (F12/D13). - tools/process_registry.py: has_any_active() for the bg-work guard (D3/F7). - hermes_cli/config.py: gateway.scale_to_zero.idle_timeout_minutes default 5. Tests: 38 pure-logic + 6 watcher (incl. bg-work regression guard proven RED). Full relay + scale-to-zero suites: 184 passed. The 20 unrelated failures in the broader run are PRE-EXISTING on origin/main (custom-provider/tools tests), confirmed via a pristine baseline worktree.
124 lines
5 KiB
Python
124 lines
5 KiB
Python
"""Scale-to-zero idle detection + dormant-quiesce for the gateway (Phase 0).
|
||
|
||
This is the gateway-side BEHAVIOUR layer that consumes the relay scale-to-zero
|
||
PRIMITIVES (gateway-gateway Phase 5: the buffered-flip, the durable per-instance
|
||
buffer, the wakeUrl poke, the reconnect supervisor). It owns the *decision* to go
|
||
idle and drives the relay transport's ``go_dormant()`` (D12) — it does NOT itself
|
||
suspend the machine. On Fly, the now-traffic-idle machine is suspended by
|
||
``autostop:"suspend"`` and woken by autostart-on-wakeUrl (decisions.md Q3=C′).
|
||
|
||
Design constraints (decisions.md):
|
||
- Per-instance enable is gated SOLELY by the NAS "Labs" toggle, carried to the
|
||
gateway as the ``HERMES_SCALE_TO_ZERO`` env stamp (D11/Q8=A). NOT a user
|
||
config key; ``scale_to_zero.idle_timeout_minutes`` IS config.yaml (D2).
|
||
- Arm only when messaging is relay-only or absent (D1/F6) AND a wakeUrl is
|
||
registered (§3.4(1)) AND the flag is set.
|
||
- Idle = no in-flight agent turn AND no inbound for N min AND no live
|
||
background work (D2/D3/F7).
|
||
- The quiesce uses ``go_dormant()`` (socket closed + supervisor preserved),
|
||
NEVER the stop/restart drain or ``disconnect()`` (F12/F14). The process stays
|
||
alive; Fly freezes+resumes it.
|
||
- ``mark_resume_pending`` is deliberately NOT called here (D13 — suspend
|
||
preserves RAM; revive only if we move to autostop:"stop" or see kills).
|
||
|
||
The pure helpers (``parse_idle_timeout_seconds``, ``scale_to_zero_enabled``,
|
||
``messaging_is_relay_only_or_absent``, ``is_idle``, ``should_arm``) take plain
|
||
inputs so they unit-test without a live gateway.
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import os
|
||
from typing import Any, Iterable, Optional
|
||
|
||
# Env flag stamped by NAS when the scaleToZero Labs toggle is on (D11/Q8=A),
|
||
# mirroring how the `relay` feature stamps GATEWAY_RELAY_URL. Truthy values only.
|
||
SCALE_TO_ZERO_ENV = "HERMES_SCALE_TO_ZERO"
|
||
|
||
# config.yaml default (D2). Behavioural setting -> config, not env.
|
||
DEFAULT_IDLE_TIMEOUT_MINUTES = 5
|
||
|
||
_TRUTHY = {"1", "true", "yes", "on"}
|
||
|
||
|
||
def scale_to_zero_enabled(environ: Optional[dict] = None) -> bool:
|
||
"""Whether the per-instance Labs toggle is on (the HERMES_SCALE_TO_ZERO stamp).
|
||
|
||
D11/Q8=A: this env flag is the SOLE per-instance enable signal reaching the
|
||
gateway. Absent/blank/falsey -> disabled (fail-safe default off).
|
||
"""
|
||
env = environ if environ is not None else os.environ
|
||
return str(env.get(SCALE_TO_ZERO_ENV, "")).strip().lower() in _TRUTHY
|
||
|
||
|
||
def parse_idle_timeout_seconds(
|
||
cfg_value: Any, default_minutes: int = DEFAULT_IDLE_TIMEOUT_MINUTES
|
||
) -> float:
|
||
"""Coerce ``scale_to_zero.idle_timeout_minutes`` (config.yaml, D2) to seconds.
|
||
|
||
Degrades to the default on any non-numeric / non-positive value (never raises,
|
||
never returns <= 0 — a zero/negative timeout would make the gateway go dormant
|
||
instantly, which is never the intent).
|
||
"""
|
||
try:
|
||
minutes = float(cfg_value)
|
||
except (TypeError, ValueError):
|
||
minutes = float(default_minutes)
|
||
if minutes <= 0:
|
||
minutes = float(default_minutes)
|
||
return minutes * 60.0
|
||
|
||
|
||
def messaging_is_relay_only_or_absent(platforms: Iterable[Any]) -> bool:
|
||
"""True iff the only connected messaging platform is RELAY, or there is none
|
||
(a Chronos-only / no-platform agent) — the F6/D1 structural precondition.
|
||
|
||
A directly-connected platform (Discord/Telegram/Slack/...) holds a live
|
||
socket and cannot scale to zero, so its presence disarms the feature. We
|
||
compare by the platform's ``.value``/name to avoid importing the enum here
|
||
(keeps this module import-light and unit-testable).
|
||
"""
|
||
names = {_platform_name(p) for p in platforms}
|
||
names.discard("relay")
|
||
return len(names) == 0
|
||
|
||
|
||
def _platform_name(platform: Any) -> str:
|
||
value = getattr(platform, "value", platform)
|
||
return str(value).strip().lower()
|
||
|
||
|
||
def should_arm(
|
||
*,
|
||
enabled: bool,
|
||
relay_only_or_absent: bool,
|
||
wake_url: Optional[str],
|
||
) -> bool:
|
||
"""Whether to start the idle watcher at all (D1/D11/§3.4(1)).
|
||
|
||
ALL must hold: the Labs flag is on, messaging is relay-only/absent, and a
|
||
wakeUrl is registered (a suspended instance with no reachable wake target is
|
||
a black hole — §3.4(1)). Any unmet -> the watcher never starts (no idle
|
||
timer, no dormancy), so a non-opted instance behaves exactly as today.
|
||
"""
|
||
return bool(enabled) and bool(relay_only_or_absent) and bool(wake_url)
|
||
|
||
|
||
def is_idle(
|
||
*,
|
||
running_agent_count: int,
|
||
seconds_since_last_inbound: float,
|
||
idle_timeout_seconds: float,
|
||
has_live_background_work: bool,
|
||
) -> bool:
|
||
"""The idle predicate (D2/D3/F7). Pure — composes the three conjuncts.
|
||
|
||
Idle iff: no in-flight agent turn, no inbound within the timeout window, and
|
||
no live background work (backgrounded delegate_task / kanban / bg terminal).
|
||
Any active work keeps the gateway awake — suspending mid-flight would lose it.
|
||
"""
|
||
if running_agent_count > 0:
|
||
return False
|
||
if has_live_background_work:
|
||
return False
|
||
return seconds_since_last_inbound >= idle_timeout_seconds
|