feat(hindsight): probe API for update_mode='append' support, dedupe across processes

Mirrors the pattern already shipping in hindsight-integrations/openclaw:
probe `<api_url>/version` once per process, gate on Hindsight ≥ 0.5.0.
When supported, retains use a stable session-scoped `document_id`
(`session_id`) plus `update_mode='append'` so cross-process retains for
the same session merge into one document instead of producing
N-different-process-stamped duplicates. When unsupported (or probe
fails), fall back to the existing per-process unique
`f"{session_id}-{start_ts}"` document_id with no `update_mode` — the
resume-overwrite fix (#6654) keeps working unchanged on legacy servers.

Closes the dedup half of #20115. The proposed `document_id_strategy`
config knob isn't needed: auto-detection via the same /version probe
the OpenClaw plugin already uses gives the same outcome with no extra
config burden, and the choice is purely a function of what the server
can do.

Plumbing
--------
- Module-level helpers (`_meets_minimum_version`, `_fetch_hindsight_api_version`,
  `_check_api_supports_update_mode_append`) cache the result per api_url
  so every provider in the process gets one /version round-trip.
- One-time WARN logged when the API is older than 0.5.0, telling the
  user to upgrade for cross-session deduplication.
- New instance helper `_resolve_retain_target(fallback_doc_id)` returns
  `(document_id, update_mode)` based on cached capability. Wired into
  `sync_turn` and the `on_session_switch` flush path.
- For local_embedded mode, the probe URL is taken from the running
  client (`client.url`) so we hit the actual daemon port rather than
  the configured default.
- `update_mode` is set on the per-item dict; `aretain_batch` already
  threads `item['update_mode']` into the API call.

Tests
-----
- `TestUpdateModeAppendCapability` (5 cases): legacy fallback, modern
  stable+append, per-url cache, one-time warn, flush-on-switch resolves
  against the OLD session.
- Existing `_make_hindsight_provider` factory in the manager-side test
  file extended to seed `_mode`/`_api_url`/`_api_key`/`_client` and stub
  `_resolve_retain_target` so the bypass-init pattern keeps working.

E2E verified against installed `~/.hermes/hermes-agent`:
- Legacy probe (unreachable host) → `legacy-session-<ts>` doc_id,
  no `update_mode`.
- Modern probe (live local_embedded 0.5.6 daemon) → stable
  `modern-session` doc_id + `update_mode='append'`.
- `test_hermes_embedded_smoke.py` passes (90s).
This commit is contained in:
Nicolò Boschi 2026-05-05 14:46:22 +02:00 committed by Teknium
parent 1efed67056
commit 3082fa0829
3 changed files with 257 additions and 6 deletions

View file

@ -248,6 +248,14 @@ def _make_hindsight_provider():
provider._atexit_registered = True
provider._ensure_writer = lambda: None
provider._register_atexit = lambda: None
# Mode + API state used by _resolve_retain_target; stub the resolver
# so tests don't actually probe the API. Real probe behavior is
# exercised by tests in tests/plugins/memory/test_hindsight_provider.py.
provider._mode = "cloud"
provider._api_url = ""
provider._api_key = ""
provider._client = None
provider._resolve_retain_target = lambda fb: (fb, None)
# Stub the network-touching helper so any enqueued flush closure is
# a no-op if ever drained in a unit test.
provider._run_hindsight_operation = lambda _op: None

View file

@ -1072,6 +1072,110 @@ class TestSessionSwitchBufferFlush:
assert call_order[1] == "3"
# ---------------------------------------------------------------------------
# update_mode='append' capability probe + retain dispatch
# ---------------------------------------------------------------------------
class TestUpdateModeAppendCapability:
def _clear_capability_cache(self):
from plugins.memory.hindsight import _append_capability_cache, _append_capability_lock
with _append_capability_lock:
_append_capability_cache.clear()
def test_legacy_api_falls_back_to_per_process_doc_id(self, provider, monkeypatch):
"""API returns no /version (or pre-0.5.0) — sync_turn must use the
per-process unique doc_id and NOT pass update_mode."""
self._clear_capability_cache()
monkeypatch.setattr(
"plugins.memory.hindsight._fetch_hindsight_api_version",
lambda *a, **kw: None,
)
old_doc = provider._document_id
provider.sync_turn("hello", "hi")
provider._retain_queue.join()
kw = provider._client.aretain_batch.call_args.kwargs
assert kw["document_id"] == old_doc
assert kw["document_id"].startswith("test-session-")
item = kw["items"][0]
assert "update_mode" not in item
def test_modern_api_uses_stable_doc_id_with_append(self, provider, monkeypatch):
"""API on >=0.5.0 — retain uses stable session_id and sets update_mode='append'."""
self._clear_capability_cache()
monkeypatch.setattr(
"plugins.memory.hindsight._fetch_hindsight_api_version",
lambda *a, **kw: "0.5.6",
)
provider.sync_turn("hello", "hi")
provider._retain_queue.join()
kw = provider._client.aretain_batch.call_args.kwargs
# Stable: just the session id, no per-process timestamp suffix.
assert kw["document_id"] == "test-session"
item = kw["items"][0]
assert item["update_mode"] == "append"
def test_capability_cached_per_url(self, provider, monkeypatch):
"""The /version probe must run at most once per (process, api_url)."""
self._clear_capability_cache()
calls = {"n": 0}
def _spy(*a, **kw):
calls["n"] += 1
return "0.5.6"
monkeypatch.setattr(
"plugins.memory.hindsight._fetch_hindsight_api_version", _spy
)
provider.sync_turn("a", "b")
provider._retain_queue.join()
provider.sync_turn("c", "d")
provider._retain_queue.join()
assert calls["n"] == 1
def test_legacy_warning_emitted_once(self, provider, monkeypatch, caplog):
"""One-time WARN nudges users to upgrade Hindsight."""
import logging
self._clear_capability_cache()
monkeypatch.setattr(
"plugins.memory.hindsight._fetch_hindsight_api_version",
lambda *a, **kw: "0.4.22",
)
with caplog.at_level(logging.WARNING, logger="plugins.memory.hindsight"):
provider.sync_turn("a", "b")
provider._retain_queue.join()
provider.sync_turn("c", "d")
provider._retain_queue.join()
warns = [r for r in caplog.records
if r.levelno == logging.WARNING
and "older than 0.5.0" in r.getMessage()]
# Cache hit on the second call → no second warn.
assert len(warns) == 1
def test_session_switch_flush_picks_capability_against_old_session(
self, provider_with_config, monkeypatch
):
"""When the API supports append, the flush on /reset must land
in the OLD session's stable document, not a per-process id."""
self._clear_capability_cache()
monkeypatch.setattr(
"plugins.memory.hindsight._fetch_hindsight_api_version",
lambda *a, **kw: "0.5.6",
)
p = provider_with_config(retain_every_n_turns=3, retain_async=False)
p.sync_turn("turn1-user", "turn1-asst")
p.sync_turn("turn2-user", "turn2-asst")
p.on_session_switch("new-sid", parent_session_id="test-session", reset=True)
p._retain_queue.join()
kw = p._client.aretain_batch.call_args.kwargs
# Flush goes to the OLD session's stable doc, not new-sid's.
assert kw["document_id"] == "test-session"
assert kw["items"][0]["update_mode"] == "append"
# ---------------------------------------------------------------------------
# System prompt tests
# ---------------------------------------------------------------------------