feat(hindsight): configurable embedded daemon health grace timeout (#50341)

On resource-contended hosts the embedded Hindsight daemon can exceed a
single 2s /health check; upstream then waits a grace window before
treating it as stale and killing+restarting it (hindsight-embed reads
HINDSIGHT_EMBED_PORT_HEALTH_GRACE_TIMEOUT, default 30s, into a
module-level constant at import time). Users on busy boxes had no
Hermes-side way to raise it short of hand-setting an env var.

Add a 'port_health_grace_timeout' config.json option to the Hindsight
plugin. When set, initialize() exports it to the process env BEFORE
daemon_embed_manager is imported (the import-time read is the contract).
setdefault() so an explicit operator env override always wins. Exposed
in 'hermes memory setup' for local_embedded mode.

Follow-up to #50308 / issue #13125 comment thread.
This commit is contained in:
Teknium 2026-06-21 12:20:53 -07:00 committed by GitHub
parent def3f6388f
commit 5e3e89cc05
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 106 additions and 0 deletions

View file

@ -17,6 +17,7 @@ Config via environment variables:
HINDSIGHT_MODE cloud or local (default: cloud)
HINDSIGHT_TIMEOUT API request timeout in seconds (default: 120)
HINDSIGHT_IDLE_TIMEOUT embedded daemon idle timeout seconds; 0 disables shutdown (default: 300)
HINDSIGHT_EMBED_PORT_HEALTH_GRACE_TIMEOUT seconds to wait for a slow embedded daemon /health before treating it as stale (default: 30; set via config.json port_health_grace_timeout)
HINDSIGHT_RETAIN_TAGS comma-separated tags attached to retained memories
HINDSIGHT_RETAIN_OBSERVATION_SCOPES observation scoping for retained memories: per_tag/combined/all_combinations, or a JSON list of tag-lists for custom scopes
HINDSIGHT_RETAIN_SOURCE metadata source value attached to retained memories
@ -86,6 +87,43 @@ def _parse_int_setting(value: Any, default: int) -> int:
return default
# Env var the embedded daemon manager reads (at import time, as a module-level
# constant) to size the grace window it waits for a slow /health before
# declaring a daemon stale and killing it. Default upstream is 30s; on
# resource-contended hosts a busy daemon can exceed a single 2s health check
# and get needlessly killed + restarted (issue #13125 comment thread). We
# surface it as plugin config so users can raise it without hand-setting an
# env var, consistent with "config.json, not raw env vars".
_PORT_HEALTH_GRACE_ENV = "HINDSIGHT_EMBED_PORT_HEALTH_GRACE_TIMEOUT"
def _export_port_health_grace_timeout(config: dict[str, Any]) -> None:
"""Export the embedded-daemon health grace timeout to the process env.
Must run BEFORE ``hindsight_embed.daemon_embed_manager`` is imported,
because the package reads the env var into a module-level constant at
import time. We only set it when the user configured a value AND the
env var isn't already set, so an explicit env override always wins.
"""
raw = config.get("port_health_grace_timeout")
if raw is None or raw == "":
return
try:
seconds = float(raw)
except (TypeError, ValueError):
logger.warning(
"Invalid Hindsight port_health_grace_timeout %r; ignoring.", raw
)
return
if seconds < 0:
logger.warning(
"Negative Hindsight port_health_grace_timeout %r; ignoring.", raw
)
return
# setdefault: an explicit env var the operator set wins over config.
os.environ.setdefault(_PORT_HEALTH_GRACE_ENV, repr(seconds))
def _check_local_runtime() -> tuple[bool, str | None]:
"""Return whether local embedded Hindsight imports cleanly.
@ -968,6 +1006,7 @@ class HindsightMemoryProvider(MemoryProvider):
{"key": "recall_prompt_preamble", "description": "Custom preamble for recalled memories in context"},
{"key": "timeout", "description": "API request timeout in seconds", "default": _DEFAULT_TIMEOUT},
{"key": "idle_timeout", "description": "Embedded daemon idle timeout in seconds (0 disables auto-shutdown)", "default": _DEFAULT_IDLE_TIMEOUT, "when": {"mode": "local_embedded"}},
{"key": "port_health_grace_timeout", "description": "Seconds to wait for a slow daemon /health before treating it as stale (raise on busy/low-resource hosts; blank uses the 30s default)", "default": "", "when": {"mode": "local_embedded"}},
]
def _get_client(self):
@ -1228,6 +1267,9 @@ class HindsightMemoryProvider(MemoryProvider):
if self._mode == "local":
self._mode = "local_embedded"
if self._mode == "local_embedded":
# Export the daemon health grace timeout BEFORE importing
# daemon_embed_manager (which reads it at import time).
_export_port_health_grace_timeout(self._config)
available, reason = _check_local_runtime()
if not available:
logger.warning(

View file

@ -0,0 +1,64 @@
"""Embedded-daemon health grace timeout export (issue #13125 comment thread).
On resource-contended hosts the embedded Hindsight daemon can exceed a single
2s /health check and get needlessly killed + restarted. Upstream exposes the
grace window via HINDSIGHT_EMBED_PORT_HEALTH_GRACE_TIMEOUT (read at import
time). The plugin surfaces it as a config.json knob and exports it to the
process env BEFORE daemon_embed_manager is imported.
"""
import importlib
import pytest
hindsight = importlib.import_module("plugins.memory.hindsight")
_export = hindsight._export_port_health_grace_timeout
_ENV = hindsight._PORT_HEALTH_GRACE_ENV
@pytest.fixture(autouse=True)
def _clear_env(monkeypatch):
monkeypatch.delenv(_ENV, raising=False)
def test_configured_value_exported(monkeypatch):
_export({"port_health_grace_timeout": 60})
import os
assert float(os.environ[_ENV]) == 60.0
def test_string_value_parsed(monkeypatch):
_export({"port_health_grace_timeout": "45"})
import os
assert float(os.environ[_ENV]) == 45.0
def test_blank_and_missing_are_noops(monkeypatch):
import os
_export({})
assert _ENV not in os.environ
_export({"port_health_grace_timeout": ""})
assert _ENV not in os.environ
_export({"port_health_grace_timeout": None})
assert _ENV not in os.environ
def test_invalid_and_negative_ignored(monkeypatch):
import os
_export({"port_health_grace_timeout": "not-a-number"})
assert _ENV not in os.environ
_export({"port_health_grace_timeout": -5})
assert _ENV not in os.environ
def test_explicit_env_wins_over_config(monkeypatch):
import os
monkeypatch.setenv(_ENV, "99")
_export({"port_health_grace_timeout": 60})
# setdefault must not clobber an operator-set env override.
assert os.environ[_ENV] == "99"