fix(api-server): share one Docker container across all API conversations (#7127)

The API server's _run_agent() was not passing task_id to run_conversation(), causing a fresh random UUID per request. This meant every Open WebUI message spun up a new Docker container and tore it down afterward — making persistent filesystem state impossible. Two fixes: 1. Pass task_id="default" so all API server conversations share the same Docker container (matching the design intent: one configured Docker environment, always the same container). 2. Derive a stable session_id from the system prompt + first user message hash instead of uuid4(). This stops hermes sessions list from being polluted with single-message throwaway sessions. Fixes #3438.
2026-04-28 01:21:43 +00:00 · 2026-04-10 04:56:35 -07:00 · 2026-04-10 04:56:35 -07:00 · f4c7086035
commit f4c7086035
parent 0b143f2ea3
2 changed files with 124 additions and 1 deletions
--- a/gateway/platforms/api_server.py
+++ b/gateway/platforms/api_server.py
@ -20,6 +20,7 @@ Requires:
 """

 import asyncio
+import hashlib
 import hmac
 import json
 import logging
@ -283,6 +284,24 @@ def _make_request_fingerprint(body: Dict[str, Any], keys: List[str]) -> str:
    return sha256(repr(subset).encode("utf-8")).hexdigest()


+def _derive_chat_session_id(
+    system_prompt: Optional[str],
+    first_user_message: str,
+) -> str:
+    """Derive a stable session ID from the conversation's first user message.
+
+    OpenAI-compatible frontends (Open WebUI, LibreChat, etc.) send the full
+    conversation history with every request.  The system prompt and first user
+    message are constant across all turns of the same conversation, so hashing
+    them produces a deterministic session ID that lets the API server reuse
+    the same Hermes session (and therefore the same Docker container sandbox
+    directory) across turns.
+    """
+    seed = f"{system_prompt or ''}\n{first_user_message}"
+    digest = hashlib.sha256(seed.encode("utf-8")).hexdigest()[:16]
+    return f"api-{digest}"
+
+
 class APIServerAdapter(BasePlatformAdapter):
    """
    OpenAI-compatible HTTP API server adapter.
@ -590,7 +609,16 @@ class APIServerAdapter(BasePlatformAdapter):
                logger.warning("Failed to load session history for %s: %s", session_id, e)
                history = []
        else:
-            session_id = str(uuid.uuid4())
+            # Derive a stable session ID from the conversation fingerprint so
+            # that consecutive messages from the same Open WebUI (or similar)
+            # conversation map to the same Hermes session.  The first user
+            # message + system prompt are constant across all turns.
+            first_user = ""
+            for cm in conversation_messages:
+                if cm.get("role") == "user":
+                    first_user = cm.get("content", "")
+                    break
+            session_id = _derive_chat_session_id(system_prompt, first_user)
            # history already set from request body above

        completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
@ -1366,6 +1394,7 @@ class APIServerAdapter(BasePlatformAdapter):
            result = agent.run_conversation(
                user_message=user_message,
                conversation_history=conversation_history,
+                task_id="default",
            )
            usage = {
                "input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,
@ -1532,6 +1561,7 @@ class APIServerAdapter(BasePlatformAdapter):
                    r = agent.run_conversation(
                        user_message=user_message,
                        conversation_history=conversation_history,
+                        task_id="default",
                    )
                    u = {
                        "input_tokens": getattr(agent, "session_prompt_tokens", 0) or 0,