mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-25 00:51:20 +00:00
fix(api-server): share one Docker container across all API conversations (#7127)
The API server's _run_agent() was not passing task_id to run_conversation(), causing a fresh random UUID per request. This meant every Open WebUI message spun up a new Docker container and tore it down afterward — making persistent filesystem state impossible. Two fixes: 1. Pass task_id="default" so all API server conversations share the same Docker container (matching the design intent: one configured Docker environment, always the same container). 2. Derive a stable session_id from the system prompt + first user message hash instead of uuid4(). This stops hermes sessions list from being polluted with single-message throwaway sessions. Fixes #3438.
This commit is contained in:
parent
0b143f2ea3
commit
f4c7086035
2 changed files with 124 additions and 1 deletions
|
|
@ -26,6 +26,7 @@ from gateway.platforms.api_server import (
|
|||
APIServerAdapter,
|
||||
ResponseStore,
|
||||
_CORS_HEADERS,
|
||||
_derive_chat_session_id,
|
||||
check_api_server_requirements,
|
||||
cors_middleware,
|
||||
security_headers_middleware,
|
||||
|
|
@ -658,6 +659,98 @@ class TestChatCompletionsEndpoint:
|
|||
data = await resp.json()
|
||||
assert "Provider failed" in data["error"]["message"]
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_stable_session_id_across_turns(self, adapter):
|
||||
"""Same conversation (same first user message) produces the same session_id."""
|
||||
mock_result = {"final_response": "ok", "messages": [], "api_calls": 1}
|
||||
|
||||
app = _create_app(adapter)
|
||||
session_ids = []
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
# Turn 1: single user message
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
|
||||
await cli.post(
|
||||
"/v1/chat/completions",
|
||||
json={
|
||||
"model": "hermes-agent",
|
||||
"messages": [{"role": "user", "content": "Hello"}],
|
||||
},
|
||||
)
|
||||
session_ids.append(mock_run.call_args.kwargs["session_id"])
|
||||
|
||||
# Turn 2: same first message, conversation grew
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
|
||||
await cli.post(
|
||||
"/v1/chat/completions",
|
||||
json={
|
||||
"model": "hermes-agent",
|
||||
"messages": [
|
||||
{"role": "user", "content": "Hello"},
|
||||
{"role": "assistant", "content": "Hi there!"},
|
||||
{"role": "user", "content": "How are you?"},
|
||||
],
|
||||
},
|
||||
)
|
||||
session_ids.append(mock_run.call_args.kwargs["session_id"])
|
||||
|
||||
assert session_ids[0] == session_ids[1], "Session ID should be stable across turns"
|
||||
assert session_ids[0].startswith("api-"), "Derived session IDs should have api- prefix"
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_different_conversations_get_different_session_ids(self, adapter):
|
||||
"""Different first messages produce different session_ids."""
|
||||
mock_result = {"final_response": "ok", "messages": [], "api_calls": 1}
|
||||
|
||||
app = _create_app(adapter)
|
||||
session_ids = []
|
||||
async with TestClient(TestServer(app)) as cli:
|
||||
for first_msg in ["Hello", "Goodbye"]:
|
||||
with patch.object(adapter, "_run_agent", new_callable=AsyncMock) as mock_run:
|
||||
mock_run.return_value = (mock_result, {"input_tokens": 0, "output_tokens": 0, "total_tokens": 0})
|
||||
await cli.post(
|
||||
"/v1/chat/completions",
|
||||
json={
|
||||
"model": "hermes-agent",
|
||||
"messages": [{"role": "user", "content": first_msg}],
|
||||
},
|
||||
)
|
||||
session_ids.append(mock_run.call_args.kwargs["session_id"])
|
||||
|
||||
assert session_ids[0] != session_ids[1]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# _derive_chat_session_id unit tests
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
class TestDeriveChatSessionId:
|
||||
def test_deterministic(self):
|
||||
"""Same inputs always produce the same session ID."""
|
||||
a = _derive_chat_session_id("sys", "hello")
|
||||
b = _derive_chat_session_id("sys", "hello")
|
||||
assert a == b
|
||||
|
||||
def test_prefix(self):
|
||||
assert _derive_chat_session_id(None, "hi").startswith("api-")
|
||||
|
||||
def test_different_system_prompt(self):
|
||||
a = _derive_chat_session_id("You are a pirate.", "Hello")
|
||||
b = _derive_chat_session_id("You are a robot.", "Hello")
|
||||
assert a != b
|
||||
|
||||
def test_different_first_message(self):
|
||||
a = _derive_chat_session_id(None, "Hello")
|
||||
b = _derive_chat_session_id(None, "Goodbye")
|
||||
assert a != b
|
||||
|
||||
def test_none_system_prompt(self):
|
||||
"""None system prompt doesn't crash."""
|
||||
sid = _derive_chat_session_id(None, "test")
|
||||
assert isinstance(sid, str) and len(sid) > 4
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# /v1/responses endpoint
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue