diff --git a/plugins/memory/hindsight/__init__.py b/plugins/memory/hindsight/__init__.py index 0f73ecedf67..9f5974b7b54 100644 --- a/plugins/memory/hindsight/__init__.py +++ b/plugins/memory/hindsight/__init__.py @@ -17,6 +17,7 @@ Config via environment variables: HINDSIGHT_MODE — cloud or local (default: cloud) HINDSIGHT_TIMEOUT — API request timeout in seconds (default: 120) HINDSIGHT_IDLE_TIMEOUT — embedded daemon idle timeout seconds; 0 disables shutdown (default: 300) + HINDSIGHT_EMBED_PORT_HEALTH_GRACE_TIMEOUT — seconds to wait for a slow embedded daemon /health before treating it as stale (default: 30; set via config.json port_health_grace_timeout) HINDSIGHT_RETAIN_TAGS — comma-separated tags attached to retained memories HINDSIGHT_RETAIN_OBSERVATION_SCOPES — observation scoping for retained memories: per_tag/combined/all_combinations, or a JSON list of tag-lists for custom scopes HINDSIGHT_RETAIN_SOURCE — metadata source value attached to retained memories @@ -86,6 +87,43 @@ def _parse_int_setting(value: Any, default: int) -> int: return default +# Env var the embedded daemon manager reads (at import time, as a module-level +# constant) to size the grace window it waits for a slow /health before +# declaring a daemon stale and killing it. Default upstream is 30s; on +# resource-contended hosts a busy daemon can exceed a single 2s health check +# and get needlessly killed + restarted (issue #13125 comment thread). We +# surface it as plugin config so users can raise it without hand-setting an +# env var, consistent with "config.json, not raw env vars". +_PORT_HEALTH_GRACE_ENV = "HINDSIGHT_EMBED_PORT_HEALTH_GRACE_TIMEOUT" + + +def _export_port_health_grace_timeout(config: dict[str, Any]) -> None: + """Export the embedded-daemon health grace timeout to the process env. + + Must run BEFORE ``hindsight_embed.daemon_embed_manager`` is imported, + because the package reads the env var into a module-level constant at + import time. We only set it when the user configured a value AND the + env var isn't already set, so an explicit env override always wins. + """ + raw = config.get("port_health_grace_timeout") + if raw is None or raw == "": + return + try: + seconds = float(raw) + except (TypeError, ValueError): + logger.warning( + "Invalid Hindsight port_health_grace_timeout %r; ignoring.", raw + ) + return + if seconds < 0: + logger.warning( + "Negative Hindsight port_health_grace_timeout %r; ignoring.", raw + ) + return + # setdefault: an explicit env var the operator set wins over config. + os.environ.setdefault(_PORT_HEALTH_GRACE_ENV, repr(seconds)) + + def _check_local_runtime() -> tuple[bool, str | None]: """Return whether local embedded Hindsight imports cleanly. @@ -968,6 +1006,7 @@ class HindsightMemoryProvider(MemoryProvider): {"key": "recall_prompt_preamble", "description": "Custom preamble for recalled memories in context"}, {"key": "timeout", "description": "API request timeout in seconds", "default": _DEFAULT_TIMEOUT}, {"key": "idle_timeout", "description": "Embedded daemon idle timeout in seconds (0 disables auto-shutdown)", "default": _DEFAULT_IDLE_TIMEOUT, "when": {"mode": "local_embedded"}}, + {"key": "port_health_grace_timeout", "description": "Seconds to wait for a slow daemon /health before treating it as stale (raise on busy/low-resource hosts; blank uses the 30s default)", "default": "", "when": {"mode": "local_embedded"}}, ] def _get_client(self): @@ -1228,6 +1267,9 @@ class HindsightMemoryProvider(MemoryProvider): if self._mode == "local": self._mode = "local_embedded" if self._mode == "local_embedded": + # Export the daemon health grace timeout BEFORE importing + # daemon_embed_manager (which reads it at import time). + _export_port_health_grace_timeout(self._config) available, reason = _check_local_runtime() if not available: logger.warning( diff --git a/tests/plugins/test_hindsight_health_grace_timeout.py b/tests/plugins/test_hindsight_health_grace_timeout.py new file mode 100644 index 00000000000..666f8a48c0f --- /dev/null +++ b/tests/plugins/test_hindsight_health_grace_timeout.py @@ -0,0 +1,64 @@ +"""Embedded-daemon health grace timeout export (issue #13125 comment thread). + +On resource-contended hosts the embedded Hindsight daemon can exceed a single +2s /health check and get needlessly killed + restarted. Upstream exposes the +grace window via HINDSIGHT_EMBED_PORT_HEALTH_GRACE_TIMEOUT (read at import +time). The plugin surfaces it as a config.json knob and exports it to the +process env BEFORE daemon_embed_manager is imported. +""" + +import importlib + +import pytest + +hindsight = importlib.import_module("plugins.memory.hindsight") +_export = hindsight._export_port_health_grace_timeout +_ENV = hindsight._PORT_HEALTH_GRACE_ENV + + +@pytest.fixture(autouse=True) +def _clear_env(monkeypatch): + monkeypatch.delenv(_ENV, raising=False) + + +def test_configured_value_exported(monkeypatch): + _export({"port_health_grace_timeout": 60}) + import os + + assert float(os.environ[_ENV]) == 60.0 + + +def test_string_value_parsed(monkeypatch): + _export({"port_health_grace_timeout": "45"}) + import os + + assert float(os.environ[_ENV]) == 45.0 + + +def test_blank_and_missing_are_noops(monkeypatch): + import os + + _export({}) + assert _ENV not in os.environ + _export({"port_health_grace_timeout": ""}) + assert _ENV not in os.environ + _export({"port_health_grace_timeout": None}) + assert _ENV not in os.environ + + +def test_invalid_and_negative_ignored(monkeypatch): + import os + + _export({"port_health_grace_timeout": "not-a-number"}) + assert _ENV not in os.environ + _export({"port_health_grace_timeout": -5}) + assert _ENV not in os.environ + + +def test_explicit_env_wins_over_config(monkeypatch): + import os + + monkeypatch.setenv(_ENV, "99") + _export({"port_health_grace_timeout": 60}) + # setdefault must not clobber an operator-set env override. + assert os.environ[_ENV] == "99"