mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-27 11:22:03 +00:00
fix(gateway): harden scale-to-zero dormancy guards (#52359)
Some checks are pending
CI / detect (push) Waiting to run
CI / tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / typecheck (push) Blocked by required conditions
CI / docs-site (push) Blocked by required conditions
CI / history-check (push) Blocked by required conditions
CI / contributor-check (push) Blocked by required conditions
CI / uv-lockfile (push) Blocked by required conditions
CI / docker-lint (push) Blocked by required conditions
CI / supply-chain (push) Blocked by required conditions
CI / osv-scanner (push) Blocked by required conditions
CI / All required checks pass (push) Blocked by required conditions
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Some checks are pending
CI / detect (push) Waiting to run
CI / tests (push) Blocked by required conditions
CI / lint (push) Blocked by required conditions
CI / typecheck (push) Blocked by required conditions
CI / docs-site (push) Blocked by required conditions
CI / history-check (push) Blocked by required conditions
CI / contributor-check (push) Blocked by required conditions
CI / uv-lockfile (push) Blocked by required conditions
CI / docker-lint (push) Blocked by required conditions
CI / supply-chain (push) Blocked by required conditions
CI / osv-scanner (push) Blocked by required conditions
CI / All required checks pass (push) Blocked by required conditions
Deploy Site / deploy-vercel (push) Waiting to run
Deploy Site / deploy-docs (push) Waiting to run
Docker Build and Publish / build-amd64 (push) Waiting to run
Docker Build and Publish / build-arm64 (push) Waiting to run
Docker Build and Publish / merge (push) Blocked by required conditions
Block scale-to-zero suspend while background async delegations are active, and restore runtime status to running on real inbound after a dormant wake.\n\nAdd regression coverage for both review findings.
This commit is contained in:
parent
e62afaca62
commit
d6269da7fd
2 changed files with 56 additions and 1 deletions
|
|
@ -3595,6 +3595,13 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
"""
|
||||
if any(not t.done() for t in self._background_tasks):
|
||||
return True
|
||||
try:
|
||||
from tools.async_delegation import active_count
|
||||
|
||||
if active_count() > 0:
|
||||
return True
|
||||
except Exception: # noqa: BLE001 - never let the idle check raise
|
||||
logger.debug("scale-to-zero async-delegation check failed", exc_info=True)
|
||||
try:
|
||||
from tools.process_registry import process_registry
|
||||
|
||||
|
|
@ -3653,6 +3660,23 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
has_live_background_work=self._scale_to_zero_has_live_background_work(),
|
||||
)
|
||||
|
||||
def _scale_to_zero_note_real_inbound(self) -> None:
|
||||
"""Stamp real inbound and restore lifecycle after a dormant wake.
|
||||
|
||||
The watcher marks runtime status `draining` as it quiesces the relay, but
|
||||
dormancy is not the stop/restart drain path: the process remains alive and
|
||||
should present as running once real traffic wakes it and re-enters the
|
||||
gateway. Internal completion/replay events intentionally do not call this
|
||||
helper, so they do not keep an otherwise idle gateway awake.
|
||||
"""
|
||||
self._last_inbound_at = time.time()
|
||||
if getattr(self, "_scale_to_zero_cooldown_until", 0.0) > 0:
|
||||
try:
|
||||
self._update_runtime_status("running")
|
||||
except Exception: # noqa: BLE001 - status restoration is best-effort
|
||||
logger.debug("scale-to-zero: status restore failed", exc_info=True)
|
||||
self._scale_to_zero_cooldown_until = 0.0
|
||||
|
||||
def _relay_adapter_for_dormancy(self):
|
||||
"""Return the connected RELAY adapter, if any (the one go_dormant targets)."""
|
||||
try:
|
||||
|
|
@ -7504,7 +7528,7 @@ class GatewayRunner(GatewayAuthorizationMixin, GatewayKanbanWatchersMixin, Gatew
|
|||
# traffic — counting them would keep a genuinely idle gateway awake. This
|
||||
# clock is what the idle predicate (gateway/scale_to_zero.is_idle) reads.
|
||||
if not is_internal:
|
||||
self._last_inbound_at = time.time()
|
||||
self._scale_to_zero_note_real_inbound()
|
||||
|
||||
# Fire pre_gateway_dispatch plugin hook for user-originated messages.
|
||||
# Plugins receive the MessageEvent and may return a dict influencing flow:
|
||||
|
|
|
|||
|
|
@ -113,6 +113,37 @@ def test_bg_work_blocks_idle_via_background_tasks(monkeypatch):
|
|||
loop.close()
|
||||
|
||||
|
||||
def test_bg_work_blocks_idle_via_async_delegation(monkeypatch):
|
||||
"""delegate_task(background=true) lives in tools.async_delegation, not the
|
||||
process registry. An active background delegation must block suspend too."""
|
||||
r = GatewayRunner.__new__(GatewayRunner)
|
||||
r._background_tasks = set()
|
||||
|
||||
monkeypatch.setattr("tools.async_delegation.active_count", lambda: 1)
|
||||
|
||||
assert r._scale_to_zero_has_live_background_work() is True
|
||||
|
||||
|
||||
def test_real_inbound_after_dormancy_restores_running_status(monkeypatch):
|
||||
"""Once a dormant gateway receives real inbound after wake, the runtime
|
||||
lifecycle must not remain stuck in the watcher-written `draining` state."""
|
||||
r = GatewayRunner.__new__(GatewayRunner)
|
||||
r._last_inbound_at = 0.0
|
||||
r._scale_to_zero_cooldown_until = time.time() + 60.0
|
||||
status_updates = []
|
||||
monkeypatch.setattr(
|
||||
r,
|
||||
"_update_runtime_status",
|
||||
lambda state=None, *a, **k: status_updates.append(state),
|
||||
raising=False,
|
||||
)
|
||||
|
||||
r._scale_to_zero_note_real_inbound()
|
||||
|
||||
assert r._last_inbound_at > 0.0
|
||||
assert status_updates == ["running"]
|
||||
|
||||
|
||||
def test_bg_work_false_when_quiet():
|
||||
r = GatewayRunner.__new__(GatewayRunner)
|
||||
r._background_tasks = set()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue