mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-28 01:21:43 +00:00
fix(api-server): share one Docker container across all API conversations (#7127)
The API server's _run_agent() was not passing task_id to run_conversation(), causing a fresh random UUID per request. This meant every Open WebUI message spun up a new Docker container and tore it down afterward — making persistent filesystem state impossible. Two fixes: 1. Pass task_id="default" so all API server conversations share the same Docker container (matching the design intent: one configured Docker environment, always the same container). 2. Derive a stable session_id from the system prompt + first user message hash instead of uuid4(). This stops hermes sessions list from being polluted with single-message throwaway sessions. Fixes #3438.
This commit is contained in:
parent
0b143f2ea3
commit
f4c7086035
2 changed files with 124 additions and 1 deletions
|
|
@ -20,6 +20,7 @@ Requires:
|
|||
"""
|
||||
|
||||
import asyncio
|
||||
import hashlib
|
||||
import hmac
|
||||
import json
|
||||
import logging
|
||||
|
|
@ -283,6 +284,24 @@ def _make_request_fingerprint(body: Dict[str, Any], keys: List[str]) -> str:
|
|||
return sha256(repr(subset).encode("utf-8")).hexdigest()
|
||||
|
||||
|
||||
def _derive_chat_session_id(
|
||||
system_prompt: Optional[str],
|
||||
first_user_message: str,
|
||||
) -> str:
|
||||
"""Derive a stable session ID from the conversation's first user message.
|
||||
|
||||
OpenAI-compatible frontends (Open WebUI, LibreChat, etc.) send the full
|
||||
conversation history with every request. The system prompt and first user
|
||||
message are constant across all turns of the same conversation, so hashing
|
||||
them produces a deterministic session ID that lets the API server reuse
|
||||
the same Hermes session (and therefore the same Docker container sandbox
|
||||
directory) across turns.
|
||||
"""
|
||||
seed = f"{system_prompt or ''}\n{first_user_message}"
|
||||
digest = hashlib.sha256(seed.encode("utf-8")).hexdigest()[:16]
|
||||
return f"api-{digest}"
|
||||
|
||||
|
||||
class APIServerAdapter(BasePlatformAdapter):
|
||||
"""
|
||||
OpenAI-compatible HTTP API server adapter.
|
||||
|
|
@ -590,7 +609,16 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
logger.warning("Failed to load session history for %s: %s", session_id, e)
|
||||
history = []
|
||||
else:
|
||||
session_id = str(uuid.uuid4())
|
||||
# Derive a stable session ID from the conversation fingerprint so
|
||||
# that consecutive messages from the same Open WebUI (or similar)
|
||||
# conversation map to the same Hermes session. The first user
|
||||
# message + system prompt are constant across all turns.
|
||||
first_user = ""
|
||||
for cm in conversation_messages:
|
||||
if cm.get("role") == "user":
|
||||
first_user = cm.get("content", "")
|
||||
break
|
||||
session_id = _derive_chat_session_id(system_prompt, first_user)
|
||||
# history already set from request body above
|
||||
|
||||
completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
|
||||
|
|
@ -1366,6 +1394,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
result = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
conversation_history=conversation_history,
|
||||
task_id="default",
|
||||
)
|
||||
usage = {
|
||||
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
|
||||
|
|
@ -1532,6 +1561,7 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
r = agent.run_conversation(
|
||||
user_message=user_message,
|
||||
conversation_history=conversation_history,
|
||||
task_id="default",
|
||||
)
|
||||
u = {
|
||||
"input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue