feat(api-server): X-Hermes-Session-Key header for long-term memory scoping (#20199)

* feat(api-server): X-Hermes-Session-Key header for long-term memory scoping

API Server integrations (Open WebUI, custom web UIs) can now pass a stable
per-channel identifier via X-Hermes-Session-Key that scopes long-term memory
(Honcho, etc.) independently of the transcript-scoped X-Hermes-Session-Id.
This matches the native gateway's session_key / session_id split: one stable
key per assistant channel, many independent transcripts that rotate on /new.

- _create_agent and _run_agent accept gateway_session_key and pass it to
  AIAgent(gateway_session_key=...), which is already honored by the Honcho
  memory provider (plugins/memory/honcho/client.py resolve_session_name).
- New shared helper _parse_session_key_header applies the same API-key
  gate, control-character sanitization, and a 256-char length cap as the
  existing session-id header.
- All three agent endpoints honor the header: /v1/chat/completions,
  /v1/responses, /v1/runs. JSON and SSE responses echo it back.
- /v1/capabilities advertises session_key_header so clients can
  feature-detect.

Closes #20060.

Co-authored-by: Andy Stewart <lazycat.manatee@gmail.com>

* chore: AUTHOR_MAP entry for manateelazycat

---------

Co-authored-by: Andy Stewart <lazycat.manatee@gmail.com>
This commit is contained in:
Teknium 2026-05-05 05:34:47 -07:00 committed by GitHub
parent 436672de0e
commit fe8560fc12
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 310 additions and 5 deletions

View file

@ -2563,3 +2563,185 @@ class TestSessionIdHeader:
call_kwargs = mock_run.call_args.kwargs
assert call_kwargs["conversation_history"] == []
assert call_kwargs["session_id"] == "some-session"
# ---------------------------------------------------------------------------
# X-Hermes-Session-Key header (long-term memory scoping)
# ---------------------------------------------------------------------------
class TestSessionKeyHeader:
"""The session key is a stable per-channel identifier that scopes
long-term memory (e.g. Honcho) independently of the transcript-scoped
session_id. A third-party Web UI passes one stable key per assistant
channel and rotates session_id on /new, matching the native
gateway's session_key / session_id split.
"""
@pytest.mark.asyncio
async def test_session_key_passed_to_agent_and_echoed(self, auth_adapter):
"""X-Hermes-Session-Key reaches _run_agent as gateway_session_key and is echoed back."""
mock_result = {"final_response": "ok", "messages": [], "api_calls": 1}
app = _create_app(auth_adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
resp = await cli.post(
"/v1/chat/completions",
headers={
"X-Hermes-Session-Key": "webui:user-42",
"Authorization": "Bearer sk-secret",
},
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "hi"}]},
)
assert resp.status == 200
assert resp.headers.get("X-Hermes-Session-Key") == "webui:user-42"
call_kwargs = mock_run.call_args.kwargs
assert call_kwargs["gateway_session_key"] == "webui:user-42"
@pytest.mark.asyncio
async def test_session_key_independent_of_session_id(self, auth_adapter):
"""Both headers coexist: key scopes memory, id scopes transcript."""
mock_result = {"final_response": "ok", "messages": [], "api_calls": 1}
mock_db = MagicMock()
mock_db.get_messages_as_conversation.return_value = []
auth_adapter._session_db = mock_db
app = _create_app(auth_adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
resp = await cli.post(
"/v1/chat/completions",
headers={
"X-Hermes-Session-Key": "channel-abc",
"X-Hermes-Session-Id": "transcript-xyz",
"Authorization": "Bearer sk-secret",
},
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "hi"}]},
)
assert resp.status == 200
assert resp.headers.get("X-Hermes-Session-Key") == "channel-abc"
assert resp.headers.get("X-Hermes-Session-Id") == "transcript-xyz"
call_kwargs = mock_run.call_args.kwargs
assert call_kwargs["gateway_session_key"] == "channel-abc"
assert call_kwargs["session_id"] == "transcript-xyz"
@pytest.mark.asyncio
async def test_session_key_absent_yields_none(self, auth_adapter):
"""Omitting the header passes gateway_session_key=None and doesn't echo."""
mock_result = {"final_response": "ok", "messages": [], "api_calls": 1}
app = _create_app(auth_adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
resp = await cli.post(
"/v1/chat/completions",
headers={"Authorization": "Bearer sk-secret"},
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "hi"}]},
)
assert resp.status == 200
assert "X-Hermes-Session-Key" not in resp.headers
call_kwargs = mock_run.call_args.kwargs
assert call_kwargs["gateway_session_key"] is None
@pytest.mark.asyncio
async def test_session_key_rejected_without_api_key(self, adapter):
"""Without API_SERVER_KEY, accepting a caller-supplied memory scope is unsafe — reject with 403."""
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post(
"/v1/chat/completions",
headers={"X-Hermes-Session-Key": "whatever"},
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "hi"}]},
)
assert resp.status == 403
@pytest.mark.asyncio
async def test_session_key_rejects_control_chars(self, auth_adapter):
"""Header injection via \\r\\n must be rejected by the server-side validator.
Note: aiohttp client refuses to SEND a header containing CR/LF
(that check fires before the request leaves the client), so we
can't reach this code path through TestClient. Test the helper
directly instead with a raw request that bypasses client-side
validation.
"""
mock_request = MagicMock()
mock_request.headers = {"X-Hermes-Session-Key": "bad\rvalue"}
key, err = auth_adapter._parse_session_key_header(mock_request)
assert key is None
assert err is not None
assert err.status == 400
@pytest.mark.asyncio
async def test_session_key_rejects_oversized(self, auth_adapter):
"""Session keys longer than the cap are rejected."""
app = _create_app(auth_adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.post(
"/v1/chat/completions",
headers={"X-Hermes-Session-Key": "x" * 1000, "Authorization": "Bearer sk-secret"},
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "hi"}]},
)
assert resp.status == 400
@pytest.mark.asyncio
async def test_session_key_threads_into_create_agent(self, auth_adapter):
"""End-to-end: verify AIAgent(gateway_session_key=...) receives the key via _create_agent."""
captured_kwargs = {}
def _fake_create_agent(**kwargs):
captured_kwargs.update(kwargs)
mock_agent = MagicMock()
mock_agent.run_conversation.return_value = {"final_response": "ok", "messages": []}
mock_agent.session_prompt_tokens = 0
mock_agent.session_completion_tokens = 0
mock_agent.session_total_tokens = 0
return mock_agent
app = _create_app(auth_adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(auth_adapter, "_create_agent", side_effect=_fake_create_agent):
resp = await cli.post(
"/v1/chat/completions",
headers={
"X-Hermes-Session-Key": "agent:main:webui:dm:user-7",
"Authorization": "Bearer sk-secret",
},
json={"model": "hermes-agent", "messages": [{"role": "user", "content": "hi"}]},
)
assert resp.status == 200
# _create_agent must be called with gateway_session_key threaded through
assert captured_kwargs.get("gateway_session_key") == "agent:main:webui:dm:user-7"
@pytest.mark.asyncio
async def test_responses_endpoint_accepts_session_key(self, auth_adapter):
"""Responses API honors the same X-Hermes-Session-Key contract."""
mock_result = {"final_response": "ok", "messages": [], "api_calls": 1}
app = _create_app(auth_adapter)
async with TestClient(TestServer(app)) as cli:
with patch.object(auth_adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
resp = await cli.post(
"/v1/responses",
headers={
"X-Hermes-Session-Key": "webui:chan-1",
"Authorization": "Bearer sk-secret",
},
json={"model": "hermes-agent", "input": "hello", "store": False},
)
assert resp.status == 200
assert resp.headers.get("X-Hermes-Session-Key") == "webui:chan-1"
call_kwargs = mock_run.call_args.kwargs
assert call_kwargs["gateway_session_key"] == "webui:chan-1"
@pytest.mark.asyncio
async def test_capabilities_advertises_session_key_header(self, adapter):
"""GET /v1/capabilities should advertise the new header so clients can feature-detect."""
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.get("/v1/capabilities")
assert resp.status == 200
data = await resp.json()
assert data["features"]["session_key_header"] == "X-Hermes-Session-Key"