mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
* fix(plugins): add thread-safe lazy-singleton helpers, fix honcho TOCTOU (#24759) get_honcho_client() and fal's _load_fal_client() used unlocked check-then-init: racing threads both ran the expensive build and the loser's client (open connection) leaked. Rather than one-off locks, add plugins/plugin_utils.py with two reusable primitives every plugin author can drop in: - lazy_singleton: decorator for zero-arg accessors - SingletonSlot: manual slot for config-keyed accessors (first wins) Both use double-checked locking; factory runs at most once; failed builds aren't cached. honcho is the reference consumer; fal's sibling TOCTOU gets a matching double-checked lock. Plugin dev guide documents the pattern so future plugins don't reintroduce the race. Closes #24759 * test(honcho): update reset test for SingletonSlot internals test_reset_clears_singleton poked the removed _honcho_client module global directly. Assert through the slot's public peek() surface instead, matching the #24759 refactor.
This commit is contained in:
parent
74239b4942
commit
47d5177a7d
7 changed files with 576 additions and 105 deletions
|
|
@ -22,6 +22,7 @@ from pathlib import Path
|
|||
|
||||
from hermes_constants import get_hermes_home
|
||||
from hermes_cli.profiles import _get_default_hermes_home
|
||||
from plugins.plugin_utils import SingletonSlot
|
||||
from typing import Any, TYPE_CHECKING
|
||||
|
||||
if TYPE_CHECKING:
|
||||
|
|
@ -737,7 +738,7 @@ class HonchoClientConfig:
|
|||
return self.workspace_id
|
||||
|
||||
|
||||
_honcho_client: Honcho | None = None
|
||||
_honcho_client_slot: SingletonSlot = SingletonSlot()
|
||||
|
||||
|
||||
def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
||||
|
|
@ -745,11 +746,14 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
|
||||
When no config is provided, attempts to load ~/.honcho/config.json
|
||||
first, falling back to environment variables.
|
||||
"""
|
||||
global _honcho_client
|
||||
|
||||
if _honcho_client is not None:
|
||||
return _honcho_client
|
||||
Thread-safe: the client is built exactly once even under concurrent
|
||||
first calls (double-checked locking via ``SingletonSlot``), so racing
|
||||
threads can't each construct a client and leak the loser's connection.
|
||||
"""
|
||||
cached = _honcho_client_slot.peek()
|
||||
if cached is not None:
|
||||
return cached
|
||||
|
||||
if config is None:
|
||||
config = HonchoClientConfig.from_global_config()
|
||||
|
|
@ -762,111 +766,116 @@ def get_honcho_client(config: HonchoClientConfig | None = None) -> Honcho:
|
|||
"For local instances, set HONCHO_BASE_URL instead."
|
||||
)
|
||||
|
||||
# Lazy-install the honcho SDK on demand. ensure() honors
|
||||
# security.allow_lazy_installs (default true). On failure we surface
|
||||
# the original ImportError-shape message so existing callers still get
|
||||
# the "go run hermes honcho setup" hint they used to.
|
||||
try:
|
||||
from tools.lazy_deps import FeatureUnavailable, ensure as _lazy_ensure
|
||||
_lazy_ensure("memory.honcho", prompt=False)
|
||||
except ImportError:
|
||||
# lazy_deps module missing — fall through to the raw import below.
|
||||
pass
|
||||
except Exception:
|
||||
# FeatureUnavailable or unexpected error. Don't crash here; let the
|
||||
# actual import attempt produce the canonical error message.
|
||||
pass
|
||||
|
||||
try:
|
||||
from honcho import Honcho
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"honcho-ai is required for Honcho integration. "
|
||||
"Install it with: pip install honcho-ai "
|
||||
"(or run `hermes honcho setup` to configure)."
|
||||
)
|
||||
|
||||
# Allow config.yaml honcho.base_url to override the SDK's environment
|
||||
# mapping, enabling remote self-hosted Honcho deployments without
|
||||
# requiring the server to live on localhost.
|
||||
resolved_base_url = config.base_url
|
||||
resolved_timeout = config.timeout
|
||||
if not resolved_base_url or resolved_timeout is None:
|
||||
# Everything below is the expensive part the issue flags: lazy SDK
|
||||
# install, config resolution, and client construction. Run it inside the
|
||||
# slot's factory so it executes exactly once even when several threads
|
||||
# race the first call — the slot's double-checked lock serializes them and
|
||||
# the losers get the winner's client instead of building their own.
|
||||
def _build() -> "Honcho":
|
||||
# Lazy-install the honcho SDK on demand. ensure() honors
|
||||
# security.allow_lazy_installs (default true). On failure we surface
|
||||
# the original ImportError-shape message so existing callers still get
|
||||
# the "go run hermes honcho setup" hint they used to.
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
hermes_cfg = load_config()
|
||||
honcho_cfg = hermes_cfg.get("honcho", {})
|
||||
if isinstance(honcho_cfg, dict):
|
||||
if not resolved_base_url:
|
||||
resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
|
||||
if resolved_timeout is None:
|
||||
resolved_timeout = _resolve_optional_float(
|
||||
honcho_cfg.get("timeout"),
|
||||
honcho_cfg.get("request_timeout"),
|
||||
)
|
||||
from tools.lazy_deps import FeatureUnavailable, ensure as _lazy_ensure
|
||||
_lazy_ensure("memory.honcho", prompt=False)
|
||||
except ImportError:
|
||||
# lazy_deps module missing — fall through to the raw import below.
|
||||
pass
|
||||
except Exception:
|
||||
# FeatureUnavailable or unexpected error. Don't crash here; let the
|
||||
# actual import attempt produce the canonical error message.
|
||||
pass
|
||||
|
||||
# Fall back to the default so an unconfigured install cannot hang
|
||||
# indefinitely on a stalled Honcho request.
|
||||
if resolved_timeout is None:
|
||||
resolved_timeout = _DEFAULT_HTTP_TIMEOUT
|
||||
try:
|
||||
from honcho import Honcho
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"honcho-ai is required for Honcho integration. "
|
||||
"Install it with: pip install honcho-ai "
|
||||
"(or run `hermes honcho setup` to configure)."
|
||||
)
|
||||
|
||||
if resolved_base_url:
|
||||
logger.info("Initializing Honcho client (base_url: %s, workspace: %s)", resolved_base_url, config.workspace_id)
|
||||
else:
|
||||
logger.info("Initializing Honcho client (host: %s, workspace: %s)", config.host, config.workspace_id)
|
||||
# Allow config.yaml honcho.base_url to override the SDK's environment
|
||||
# mapping, enabling remote self-hosted Honcho deployments without
|
||||
# requiring the server to live on localhost.
|
||||
resolved_base_url = config.base_url
|
||||
resolved_timeout = config.timeout
|
||||
if not resolved_base_url or resolved_timeout is None:
|
||||
try:
|
||||
from hermes_cli.config import load_config
|
||||
hermes_cfg = load_config()
|
||||
honcho_cfg = hermes_cfg.get("honcho", {})
|
||||
if isinstance(honcho_cfg, dict):
|
||||
if not resolved_base_url:
|
||||
resolved_base_url = honcho_cfg.get("base_url", "").strip() or None
|
||||
if resolved_timeout is None:
|
||||
resolved_timeout = _resolve_optional_float(
|
||||
honcho_cfg.get("timeout"),
|
||||
honcho_cfg.get("request_timeout"),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Local Honcho instances don't require an API key, but the SDK
|
||||
# expects a non-empty string. Use a placeholder for local URLs.
|
||||
# For local: only use config.api_key if the host block explicitly
|
||||
# sets apiKey (meaning the user wants local auth). Otherwise skip
|
||||
# the stored key -- it's likely a cloud key that would break local.
|
||||
_is_local = resolved_base_url and (
|
||||
"localhost" in resolved_base_url
|
||||
or "127.0.0.1" in resolved_base_url
|
||||
or "::1" in resolved_base_url
|
||||
)
|
||||
if _is_local:
|
||||
# Check if the host block has its own apiKey (explicit local auth).
|
||||
# Auth-skipping is loopback-only: a stored key is likely a cloud key
|
||||
# that would break a no-auth local server, so we substitute the SDK's
|
||||
# required-non-empty placeholder unless the host block opts in.
|
||||
_raw = config.raw or {}
|
||||
_host_block = (_raw.get("hosts") or {}).get(config.host, {})
|
||||
_host_has_key = bool(_host_block.get("apiKey"))
|
||||
effective_api_key = config.api_key if _host_has_key else "local"
|
||||
else:
|
||||
effective_api_key = config.api_key
|
||||
# Fall back to the default so an unconfigured install cannot hang
|
||||
# indefinitely on a stalled Honcho request.
|
||||
if resolved_timeout is None:
|
||||
resolved_timeout = _DEFAULT_HTTP_TIMEOUT
|
||||
|
||||
# The Honcho SDK's route builders (e.g. routes.workspaces()) already
|
||||
# include the version prefix (e.g. "/v3/workspaces"). When a user-supplied
|
||||
# base_url already ends in a version segment (e.g.
|
||||
# "http://localhost:38000/v3", "https://honcho.my.ts.net/v3"), concatenating
|
||||
# the two produces "/v3/v3/workspaces" → 404 on every call. This is a pure
|
||||
# routing concern independent of host, so strip a trailing version segment
|
||||
# from ANY base_url — loopback, LAN, custom domain, or cloud alike. The
|
||||
# SDK then appends its own versioned paths correctly.
|
||||
if resolved_base_url:
|
||||
import re as _re
|
||||
resolved_base_url = _re.sub(r"/v\d+/*$", "", resolved_base_url).rstrip("/")
|
||||
if resolved_base_url:
|
||||
logger.info("Initializing Honcho client (base_url: %s, workspace: %s)", resolved_base_url, config.workspace_id)
|
||||
else:
|
||||
logger.info("Initializing Honcho client (host: %s, workspace: %s)", config.host, config.workspace_id)
|
||||
|
||||
kwargs: dict = {
|
||||
"workspace_id": config.workspace_id,
|
||||
"api_key": effective_api_key,
|
||||
"environment": config.environment,
|
||||
}
|
||||
if resolved_base_url:
|
||||
kwargs["base_url"] = resolved_base_url
|
||||
if resolved_timeout is not None:
|
||||
kwargs["timeout"] = resolved_timeout
|
||||
# Local Honcho instances don't require an API key, but the SDK
|
||||
# expects a non-empty string. Use a placeholder for local URLs.
|
||||
# For local: only use config.api_key if the host block explicitly
|
||||
# sets apiKey (meaning the user wants local auth). Otherwise skip
|
||||
# the stored key -- it's likely a cloud key that would break local.
|
||||
_is_local = resolved_base_url and (
|
||||
"localhost" in resolved_base_url
|
||||
or "127.0.0.1" in resolved_base_url
|
||||
or "::1" in resolved_base_url
|
||||
)
|
||||
if _is_local:
|
||||
# Check if the host block has its own apiKey (explicit local auth).
|
||||
# Auth-skipping is loopback-only: a stored key is likely a cloud key
|
||||
# that would break a no-auth local server, so we substitute the SDK's
|
||||
# required-non-empty placeholder unless the host block opts in.
|
||||
_raw = config.raw or {}
|
||||
_host_block = (_raw.get("hosts") or {}).get(config.host, {})
|
||||
_host_has_key = bool(_host_block.get("apiKey"))
|
||||
effective_api_key = config.api_key if _host_has_key else "local"
|
||||
else:
|
||||
effective_api_key = config.api_key
|
||||
|
||||
_honcho_client = Honcho(**kwargs)
|
||||
# The Honcho SDK's route builders (e.g. routes.workspaces()) already
|
||||
# include the version prefix (e.g. "/v3/workspaces"). When a user-supplied
|
||||
# base_url already ends in a version segment (e.g.
|
||||
# "http://localhost:38000/v3", "https://honcho.my.ts.net/v3"), concatenating
|
||||
# the two produces "/v3/v3/workspaces" → 404 on every call. This is a pure
|
||||
# routing concern independent of host, so strip a trailing version segment
|
||||
# from ANY base_url — loopback, LAN, custom domain, or cloud alike. The
|
||||
# SDK then appends its own versioned paths correctly.
|
||||
if resolved_base_url:
|
||||
import re as _re
|
||||
resolved_base_url = _re.sub(r"/v\d+/*$", "", resolved_base_url).rstrip("/")
|
||||
|
||||
return _honcho_client
|
||||
kwargs: dict = {
|
||||
"workspace_id": config.workspace_id,
|
||||
"api_key": effective_api_key,
|
||||
"environment": config.environment,
|
||||
}
|
||||
if resolved_base_url:
|
||||
kwargs["base_url"] = resolved_base_url
|
||||
if resolved_timeout is not None:
|
||||
kwargs["timeout"] = resolved_timeout
|
||||
|
||||
return Honcho(**kwargs)
|
||||
|
||||
return _honcho_client_slot.get(_build)
|
||||
|
||||
|
||||
def reset_honcho_client() -> None:
|
||||
"""Reset the Honcho client singleton (useful for testing)."""
|
||||
global _honcho_client
|
||||
_honcho_client = None
|
||||
_honcho_client_slot.reset()
|
||||
|
|
|
|||
135
plugins/plugin_utils.py
Normal file
135
plugins/plugin_utils.py
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
"""Shared concurrency helpers for plugin authors.
|
||||
|
||||
The most common plugin footgun is the lazy process-wide singleton:
|
||||
|
||||
_client = None
|
||||
|
||||
def get_client():
|
||||
global _client
|
||||
if _client is not None:
|
||||
return _client
|
||||
_client = ExpensiveClient(...) # <-- TOCTOU: two threads both run this
|
||||
return _client
|
||||
|
||||
When two threads call ``get_client()`` before the singleton is set, both pass
|
||||
the ``is not None`` guard, both run the expensive initialization, and the
|
||||
second write clobbers the first — leaking whatever resource the first client
|
||||
opened (connections, file handles, background threads).
|
||||
|
||||
Multi-threaded agent sessions share one process (delegated tool calls,
|
||||
background workers, the self-improvement fork), so this race is reachable in
|
||||
practice. Rather than make every plugin author remember to hand-roll
|
||||
double-checked locking, this module gives them two thread-safe primitives:
|
||||
|
||||
* :func:`lazy_singleton` — decorator for the zero-arg accessor case.
|
||||
* :class:`SingletonSlot` — manual slot for accessors that build different
|
||||
instances depending on a config/key argument.
|
||||
|
||||
Both are import-light (stdlib ``threading`` only) so any plugin can import
|
||||
them without dragging in heavyweight host modules.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import functools
|
||||
import threading
|
||||
from typing import Callable, Generic, Optional, TypeVar
|
||||
|
||||
__all__ = ["lazy_singleton", "SingletonSlot"]
|
||||
|
||||
T = TypeVar("T")
|
||||
|
||||
|
||||
def lazy_singleton(factory: Callable[[], T]) -> Callable[[], T]:
|
||||
"""Wrap a zero-argument factory into a thread-safe lazy singleton accessor.
|
||||
|
||||
The wrapped callable returns the same instance on every call; the factory
|
||||
runs exactly once even under concurrent first calls, using double-checked
|
||||
locking. A ``.reset()`` attribute is attached for tests/teardown.
|
||||
|
||||
Example::
|
||||
|
||||
@lazy_singleton
|
||||
def get_client():
|
||||
return ExpensiveClient(load_config())
|
||||
|
||||
client = get_client() # built once, safe across threads
|
||||
get_client.reset() # drop the instance (next call rebuilds)
|
||||
|
||||
Note: if the factory raises, no instance is cached and the next call
|
||||
retries (the lock is released either way).
|
||||
"""
|
||||
lock = threading.Lock()
|
||||
box: list = [] # one-element [instance]; empty == not yet built
|
||||
|
||||
@functools.wraps(factory)
|
||||
def accessor() -> T:
|
||||
if box:
|
||||
return box[0]
|
||||
with lock:
|
||||
if box: # re-check inside the lock
|
||||
return box[0]
|
||||
instance = factory()
|
||||
box.append(instance)
|
||||
return instance
|
||||
|
||||
def reset() -> None:
|
||||
with lock:
|
||||
box.clear()
|
||||
|
||||
accessor.reset = reset # type: ignore[attr-defined]
|
||||
return accessor
|
||||
|
||||
|
||||
class SingletonSlot(Generic[T]):
|
||||
"""Thread-safe lazy slot for accessors that take a build argument.
|
||||
|
||||
Use this when the cached instance depends on a config/key passed to the
|
||||
accessor (so a bare zero-arg :func:`lazy_singleton` doesn't fit). The slot
|
||||
caches the first successfully-built instance and ignores the argument on
|
||||
subsequent calls — matching the established "first config wins" singleton
|
||||
semantics most plugins already rely on.
|
||||
|
||||
Example::
|
||||
|
||||
_slot: SingletonSlot[Honcho] = SingletonSlot()
|
||||
|
||||
def get_honcho_client(config=None):
|
||||
return _slot.get(lambda: Honcho(**resolve(config)))
|
||||
|
||||
def reset_honcho_client():
|
||||
_slot.reset()
|
||||
|
||||
The factory runs at most once even under concurrent first calls. If the
|
||||
factory raises, nothing is cached and the next call retries.
|
||||
"""
|
||||
|
||||
__slots__ = ("_lock", "_value", "_set")
|
||||
|
||||
def __init__(self) -> None:
|
||||
self._lock = threading.Lock()
|
||||
self._value: Optional[T] = None
|
||||
self._set = False
|
||||
|
||||
def get(self, factory: Callable[[], T]) -> T:
|
||||
# Fast path: already built, no lock needed (a set bool + ref read is
|
||||
# atomic under CPython's GIL).
|
||||
if self._set:
|
||||
return self._value # type: ignore[return-value]
|
||||
with self._lock:
|
||||
if self._set: # re-check inside the lock
|
||||
return self._value # type: ignore[return-value]
|
||||
value = factory()
|
||||
self._value = value
|
||||
self._set = True
|
||||
return value
|
||||
|
||||
def peek(self) -> Optional[T]:
|
||||
"""Return the cached instance without building it (None if unset)."""
|
||||
return self._value if self._set else None
|
||||
|
||||
def reset(self) -> None:
|
||||
"""Drop the cached instance so the next ``get()`` rebuilds it."""
|
||||
with self._lock:
|
||||
self._value = None
|
||||
self._set = False
|
||||
|
|
@ -291,6 +291,7 @@ def _build_payload(
|
|||
# ---------------------------------------------------------------------------
|
||||
|
||||
_fal_client: Any = None
|
||||
_fal_client_lock = threading.Lock()
|
||||
|
||||
|
||||
def _load_fal_client() -> Any:
|
||||
|
|
@ -298,13 +299,19 @@ def _load_fal_client() -> Any:
|
|||
|
||||
Delegates the actual import to :func:`tools.fal_common.import_fal_client`
|
||||
so the ``lazy_deps`` ensure-install handling stays in one place.
|
||||
|
||||
Thread-safe via double-checked locking: concurrent first calls import
|
||||
the SDK exactly once instead of each racing thread re-running the import.
|
||||
"""
|
||||
global _fal_client
|
||||
if _fal_client is not None:
|
||||
return _fal_client
|
||||
from tools.fal_common import import_fal_client
|
||||
_fal_client = import_fal_client()
|
||||
return _fal_client
|
||||
with _fal_client_lock:
|
||||
if _fal_client is not None: # re-check inside the lock
|
||||
return _fal_client
|
||||
from tools.fal_common import import_fal_client
|
||||
_fal_client = import_fal_client()
|
||||
return _fal_client
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
|
|
|
|||
|
|
@ -819,10 +819,15 @@ class TestResolveSessionNameLengthLimit:
|
|||
class TestResetHonchoClient:
|
||||
def test_reset_clears_singleton(self):
|
||||
import plugins.memory.honcho.client as mod
|
||||
mod._honcho_client = MagicMock()
|
||||
assert mod._honcho_client is not None
|
||||
|
||||
# Seed the cached client through the slot's public surface, then
|
||||
# verify reset_honcho_client() clears it. (The client is cached in
|
||||
# mod._honcho_client_slot, a thread-safe SingletonSlot, not a bare
|
||||
# module global anymore — see #24759.)
|
||||
mod._honcho_client_slot.get(lambda: MagicMock())
|
||||
assert mod._honcho_client_slot.peek() is not None
|
||||
reset_honcho_client()
|
||||
assert mod._honcho_client is None
|
||||
assert mod._honcho_client_slot.peek() is None
|
||||
|
||||
|
||||
class TestDialecticDepthParsing:
|
||||
|
|
|
|||
109
tests/test_honcho_client_concurrency.py
Normal file
109
tests/test_honcho_client_concurrency.py
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
"""Concurrency test for get_honcho_client() — the TOCTOU race fix (#24759).
|
||||
|
||||
Proves the Honcho client is constructed exactly once even when many threads
|
||||
race the first call, by stubbing the SDK constructor and counting invocations.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import threading
|
||||
import types
|
||||
|
||||
import pytest
|
||||
|
||||
from plugins.memory.honcho import client as honcho_client
|
||||
from plugins.memory.honcho.client import (
|
||||
HonchoClientConfig,
|
||||
get_honcho_client,
|
||||
reset_honcho_client,
|
||||
)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _reset_singleton():
|
||||
reset_honcho_client()
|
||||
yield
|
||||
reset_honcho_client()
|
||||
|
||||
|
||||
def _install_fake_honcho_sdk(monkeypatch, build_count, build_lock):
|
||||
"""Make `from honcho import Honcho` resolve to a counting fake."""
|
||||
|
||||
class _FakeHoncho:
|
||||
def __init__(self, **kwargs):
|
||||
with build_lock:
|
||||
build_count["n"] += 1
|
||||
import time
|
||||
time.sleep(0.01) # widen the race window
|
||||
self.kwargs = kwargs
|
||||
|
||||
fake_mod = types.ModuleType("honcho")
|
||||
fake_mod.Honcho = _FakeHoncho
|
||||
monkeypatch.setitem(sys.modules, "honcho", fake_mod)
|
||||
# Skip the lazy-install path entirely.
|
||||
monkeypatch.setattr(
|
||||
honcho_client, "_resolve_optional_float", lambda *a, **k: None, raising=False
|
||||
)
|
||||
|
||||
|
||||
def test_get_honcho_client_builds_once_under_concurrent_first_call(monkeypatch):
|
||||
build_count = {"n": 0}
|
||||
build_lock = threading.Lock()
|
||||
_install_fake_honcho_sdk(monkeypatch, build_count, build_lock)
|
||||
|
||||
config = HonchoClientConfig(
|
||||
api_key="test-key",
|
||||
workspace_id="ws",
|
||||
environment="production",
|
||||
)
|
||||
|
||||
barrier = threading.Barrier(20)
|
||||
results = []
|
||||
results_lock = threading.Lock()
|
||||
|
||||
def worker():
|
||||
barrier.wait()
|
||||
c = get_honcho_client(config)
|
||||
with results_lock:
|
||||
results.append(c)
|
||||
|
||||
threads = [threading.Thread(target=worker) for _ in range(20)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
assert build_count["n"] == 1, "Honcho client must be constructed exactly once"
|
||||
assert len(results) == 20
|
||||
assert all(r is results[0] for r in results), "all threads share one client"
|
||||
|
||||
|
||||
def test_reset_allows_rebuild(monkeypatch):
|
||||
build_count = {"n": 0}
|
||||
build_lock = threading.Lock()
|
||||
_install_fake_honcho_sdk(monkeypatch, build_count, build_lock)
|
||||
|
||||
config = HonchoClientConfig(
|
||||
api_key="test-key", workspace_id="ws", environment="production"
|
||||
)
|
||||
|
||||
c1 = get_honcho_client(config)
|
||||
assert build_count["n"] == 1
|
||||
# Cached: no rebuild.
|
||||
assert get_honcho_client(config) is c1
|
||||
assert build_count["n"] == 1
|
||||
|
||||
reset_honcho_client()
|
||||
c2 = get_honcho_client(config)
|
||||
assert build_count["n"] == 2
|
||||
assert c2 is not c1
|
||||
|
||||
|
||||
def test_missing_credentials_still_raises_before_build(monkeypatch):
|
||||
build_count = {"n": 0}
|
||||
build_lock = threading.Lock()
|
||||
_install_fake_honcho_sdk(monkeypatch, build_count, build_lock)
|
||||
|
||||
bad = HonchoClientConfig(api_key="", base_url="", workspace_id="ws")
|
||||
with pytest.raises(ValueError):
|
||||
get_honcho_client(bad)
|
||||
assert build_count["n"] == 0
|
||||
159
tests/test_plugin_utils.py
Normal file
159
tests/test_plugin_utils.py
Normal file
|
|
@ -0,0 +1,159 @@
|
|||
"""Tests for plugins/plugin_utils.py — thread-safe lazy singleton helpers.
|
||||
|
||||
These exercise the actual concurrency guarantee with real threads (not mocks):
|
||||
a barrier releases N threads simultaneously into the accessor, and we assert
|
||||
the factory ran exactly once.
|
||||
"""
|
||||
|
||||
import threading
|
||||
|
||||
import pytest
|
||||
|
||||
from plugins.plugin_utils import SingletonSlot, lazy_singleton
|
||||
|
||||
|
||||
# --- lazy_singleton -------------------------------------------------------
|
||||
|
||||
|
||||
def test_lazy_singleton_builds_once_and_returns_same_instance():
|
||||
calls = []
|
||||
|
||||
@lazy_singleton
|
||||
def get():
|
||||
calls.append(1)
|
||||
return object()
|
||||
|
||||
a = get()
|
||||
b = get()
|
||||
assert a is b
|
||||
assert len(calls) == 1
|
||||
|
||||
|
||||
def test_lazy_singleton_reset_rebuilds():
|
||||
counter = {"n": 0}
|
||||
|
||||
@lazy_singleton
|
||||
def get():
|
||||
counter["n"] += 1
|
||||
return counter["n"]
|
||||
|
||||
assert get() == 1
|
||||
assert get() == 1
|
||||
get.reset()
|
||||
assert get() == 2
|
||||
|
||||
|
||||
def test_lazy_singleton_factory_exception_not_cached():
|
||||
state = {"fail": True}
|
||||
|
||||
@lazy_singleton
|
||||
def get():
|
||||
if state["fail"]:
|
||||
raise RuntimeError("boom")
|
||||
return "ok"
|
||||
|
||||
with pytest.raises(RuntimeError):
|
||||
get()
|
||||
# First call raised → nothing cached → retry succeeds once we stop failing.
|
||||
state["fail"] = False
|
||||
assert get() == "ok"
|
||||
|
||||
|
||||
def test_lazy_singleton_concurrent_first_call_builds_once():
|
||||
build_count = {"n": 0}
|
||||
build_lock = threading.Lock()
|
||||
barrier = threading.Barrier(16)
|
||||
results = []
|
||||
results_lock = threading.Lock()
|
||||
|
||||
@lazy_singleton
|
||||
def get():
|
||||
# Count builds under a lock so the assertion is exact even if the
|
||||
# double-checked lock had a bug and let two through.
|
||||
with build_lock:
|
||||
build_count["n"] += 1
|
||||
# Simulate an expensive build so threads genuinely overlap.
|
||||
import time
|
||||
time.sleep(0.01)
|
||||
return object()
|
||||
|
||||
def worker():
|
||||
barrier.wait() # release all threads at once
|
||||
obj = get()
|
||||
with results_lock:
|
||||
results.append(obj)
|
||||
|
||||
threads = [threading.Thread(target=worker) for _ in range(16)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
assert build_count["n"] == 1, "factory must run exactly once under race"
|
||||
assert len(results) == 16
|
||||
assert all(r is results[0] for r in results), "all callers share one instance"
|
||||
|
||||
|
||||
# --- SingletonSlot --------------------------------------------------------
|
||||
|
||||
|
||||
def test_slot_caches_first_value():
|
||||
slot: SingletonSlot = SingletonSlot()
|
||||
assert slot.peek() is None
|
||||
v1 = slot.get(lambda: "first")
|
||||
assert slot.peek() == "first"
|
||||
# Subsequent factory is ignored — first value wins.
|
||||
v2 = slot.get(lambda: "second")
|
||||
assert v1 == v2 == "first"
|
||||
|
||||
|
||||
def test_slot_reset():
|
||||
slot: SingletonSlot = SingletonSlot()
|
||||
slot.get(lambda: "a")
|
||||
slot.reset()
|
||||
assert slot.peek() is None
|
||||
assert slot.get(lambda: "b") == "b"
|
||||
|
||||
|
||||
def test_slot_factory_exception_not_cached():
|
||||
slot: SingletonSlot = SingletonSlot()
|
||||
|
||||
def boom():
|
||||
raise ValueError("nope")
|
||||
|
||||
with pytest.raises(ValueError):
|
||||
slot.get(boom)
|
||||
assert slot.peek() is None
|
||||
assert slot.get(lambda: "recovered") == "recovered"
|
||||
|
||||
|
||||
def test_slot_concurrent_first_call_builds_once():
|
||||
build_count = {"n": 0}
|
||||
build_lock = threading.Lock()
|
||||
barrier = threading.Barrier(16)
|
||||
slot: SingletonSlot = SingletonSlot()
|
||||
results = []
|
||||
results_lock = threading.Lock()
|
||||
|
||||
def factory():
|
||||
with build_lock:
|
||||
build_count["n"] += 1
|
||||
import time
|
||||
time.sleep(0.01)
|
||||
return object()
|
||||
|
||||
def worker():
|
||||
barrier.wait()
|
||||
obj = slot.get(factory)
|
||||
with results_lock:
|
||||
results.append(obj)
|
||||
|
||||
threads = [threading.Thread(target=worker) for _ in range(16)]
|
||||
for t in threads:
|
||||
t.start()
|
||||
for t in threads:
|
||||
t.join()
|
||||
|
||||
assert build_count["n"] == 1
|
||||
assert len(results) == 16
|
||||
assert all(r is results[0] for r in results)
|
||||
|
|
@ -488,6 +488,53 @@ When `security.allow_lazy_installs: false` is set globally, `ensure()` raises `F
|
|||
|
||||
|
||||
|
||||
### Thread-safe lazy singletons
|
||||
|
||||
Plugins often cache an expensive object — an SDK client, an HTTP session, a connection pool — in a module-level variable built on first use:
|
||||
|
||||
```python
|
||||
_client = None
|
||||
|
||||
def get_client():
|
||||
global _client
|
||||
if _client is not None:
|
||||
return _client
|
||||
_client = ExpensiveClient(...) # ← TOCTOU race
|
||||
return _client
|
||||
```
|
||||
|
||||
This is a footgun. Hermes runs multiple threads in one process (delegated tool calls, background workers, the self-improvement fork), so two threads can hit `get_client()` before `_client` is set, **both** pass the `is not None` check, **both** run the expensive build, and the second write clobbers the first — leaking whatever resource the loser opened (connection, file handle, background thread).
|
||||
|
||||
Don't hand-roll the lock. Use the helpers in `plugins/plugin_utils.py`:
|
||||
|
||||
```python
|
||||
from plugins.plugin_utils import lazy_singleton, SingletonSlot
|
||||
|
||||
# Zero-arg accessor → decorate it:
|
||||
@lazy_singleton
|
||||
def get_client():
|
||||
return ExpensiveClient(load_config()) # runs exactly once
|
||||
|
||||
client = get_client() # safe across threads
|
||||
get_client.reset() # drop the instance (tests / teardown)
|
||||
|
||||
|
||||
# Accessor that takes a build argument → use a slot:
|
||||
_slot: SingletonSlot = SingletonSlot()
|
||||
|
||||
def get_client(config=None):
|
||||
return _slot.get(lambda: ExpensiveClient(resolve(config)))
|
||||
|
||||
def reset_client():
|
||||
_slot.reset()
|
||||
```
|
||||
|
||||
Both serialize concurrent first calls with double-checked locking and run the factory at most once. If the factory raises, nothing is cached and the next call retries. The honcho memory plugin (`plugins/memory/honcho/client.py`) is the reference consumer.
|
||||
|
||||
> Rule of thumb: any time you write `global _something` followed by a `is None` check and a build, reach for one of these instead.
|
||||
|
||||
|
||||
|
||||
### Conditional tool availability
|
||||
|
||||
For tools that depend on optional libraries:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue