diff --git a/gateway/config.py b/gateway/config.py index a50c9331ca..e4f04d8911 100644 --- a/gateway/config.py +++ b/gateway/config.py @@ -901,6 +901,9 @@ def _apply_env_overrides(config: GatewayConfig) -> None: pass if api_server_host: config.platforms[Platform.API_SERVER].extra["host"] = api_server_host + api_server_model_name = os.getenv("API_SERVER_MODEL_NAME", "") + if api_server_model_name: + config.platforms[Platform.API_SERVER].extra["model_name"] = api_server_model_name # Webhook platform webhook_enabled = os.getenv("WEBHOOK_ENABLED", "").lower() in ("true", "1", "yes") diff --git a/gateway/platforms/api_server.py b/gateway/platforms/api_server.py index aafc1579af..132790e5bd 100644 --- a/gateway/platforms/api_server.py +++ b/gateway/platforms/api_server.py @@ -299,6 +299,9 @@ class APIServerAdapter(BasePlatformAdapter): self._cors_origins: tuple[str, ...] = self._parse_cors_origins( extra.get("cors_origins", os.getenv("API_SERVER_CORS_ORIGINS", "")), ) + self._model_name: str = self._resolve_model_name( + extra.get("model_name", os.getenv("API_SERVER_MODEL_NAME", "")), + ) self._app: Optional["web.Application"] = None self._runner: Optional["web.AppRunner"] = None self._site: Optional["web.TCPSite"] = None @@ -324,6 +327,26 @@ class APIServerAdapter(BasePlatformAdapter): return tuple(str(item).strip() for item in items if str(item).strip()) + @staticmethod + def _resolve_model_name(explicit: str) -> str: + """Derive the advertised model name for /v1/models. + + Priority: + 1. Explicit override (config extra or API_SERVER_MODEL_NAME env var) + 2. Active profile name (so each profile advertises a distinct model) + 3. Fallback: "hermes-agent" + """ + if explicit and explicit.strip(): + return explicit.strip() + try: + from hermes_cli.profiles import get_active_profile_name + profile = get_active_profile_name() + if profile and profile not in ("default", "custom"): + return profile + except Exception: + pass + return "hermes-agent" + def _cors_headers_for_origin(self, origin: str) -> Optional[Dict[str, str]]: """Return CORS headers for an allowed browser origin.""" if not origin or not self._cors_origins: @@ -468,12 +491,12 @@ class APIServerAdapter(BasePlatformAdapter): "object": "list", "data": [ { - "id": "hermes-agent", + "id": self._model_name, "object": "model", "created": int(time.time()), "owned_by": "hermes", "permission": [], - "root": "hermes-agent", + "root": self._model_name, "parent": None, } ], @@ -546,7 +569,7 @@ class APIServerAdapter(BasePlatformAdapter): # history already set from request body above completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}" - model_name = body.get("model", "hermes-agent") + model_name = body.get("model", self._model_name) created = int(time.time()) if stream: @@ -923,7 +946,7 @@ class APIServerAdapter(BasePlatformAdapter): "object": "response", "status": "completed", "created_at": created_at, - "model": body.get("model", "hermes-agent"), + "model": body.get("model", self._model_name), "output": output_items, "usage": { "input_tokens": usage.get("input_tokens", 0), @@ -1653,8 +1676,8 @@ class APIServerAdapter(BasePlatformAdapter): self._mark_connected() logger.info( - "[%s] API server listening on http://%s:%d", - self.name, self._host, self._port, + "[%s] API server listening on http://%s:%d (model: %s)", + self.name, self._host, self._port, self._model_name, ) return True diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 80dce6c048..6ae094e3f0 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -1216,6 +1216,14 @@ OPTIONAL_ENV_VARS = { "category": "messaging", "advanced": True, }, + "API_SERVER_MODEL_NAME": { + "description": "Model name advertised on /v1/models. Defaults to the profile name (or 'hermes-agent' for the default profile). Useful for multi-user setups with OpenWebUI.", + "prompt": "API server model name", + "url": None, + "password": False, + "category": "messaging", + "advanced": True, + }, "WEBHOOK_ENABLED": { "description": "Enable the webhook platform adapter for receiving events from GitHub, GitLab, etc.", "prompt": "Enable webhooks (true/false)", diff --git a/run_agent.py b/run_agent.py index 751f7b398d..d05d8d09e0 100644 --- a/run_agent.py +++ b/run_agent.py @@ -4584,20 +4584,31 @@ class AIAgent: # Build mock response matching non-streaming shape full_content = "".join(content_parts) or None mock_tool_calls = None + has_truncated_tool_args = False if tool_calls_acc: mock_tool_calls = [] for idx in sorted(tool_calls_acc): tc = tool_calls_acc[idx] + arguments = tc["function"]["arguments"] + if arguments and arguments.strip(): + try: + json.loads(arguments) + except json.JSONDecodeError: + has_truncated_tool_args = True mock_tool_calls.append(SimpleNamespace( id=tc["id"], type=tc["type"], extra_content=tc.get("extra_content"), function=SimpleNamespace( name=tc["function"]["name"], - arguments=tc["function"]["arguments"], + arguments=arguments, ), )) + effective_finish_reason = finish_reason or "stop" + if has_truncated_tool_args: + effective_finish_reason = "length" + full_reasoning = "".join(reasoning_parts) or None mock_message = SimpleNamespace( role=role, @@ -4608,7 +4619,7 @@ class AIAgent: mock_choice = SimpleNamespace( index=0, message=mock_message, - finish_reason=finish_reason or "stop", + finish_reason=effective_finish_reason, ) return SimpleNamespace( id="stream-" + str(uuid.uuid4()), @@ -7320,6 +7331,7 @@ class AIAgent: interrupted = False codex_ack_continuations = 0 length_continue_retries = 0 + truncated_tool_call_retries = 0 truncated_response_prefix = "" compression_attempts = 0 _turn_exit_reason = "unknown" # Diagnostic: why the loop ended @@ -7788,9 +7800,11 @@ class AIAgent: # retries are pointless. Detect this early and give a # targeted error instead of wasting 3 API calls. _trunc_content = None + _trunc_has_tool_calls = False if self.api_mode == "chat_completions": _trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None _trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None + _trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False elif self.api_mode == "anthropic_messages": # Anthropic response.content is a list of blocks _text_parts = [] @@ -7800,9 +7814,11 @@ class AIAgent: _trunc_content = "\n".join(_text_parts) if _text_parts else None _thinking_exhausted = ( - _trunc_content is not None - and not self._has_content_after_think_block(_trunc_content) - ) or _trunc_content is None + not _trunc_has_tool_calls and ( + (_trunc_content is not None and not self._has_content_after_think_block(_trunc_content)) + or _trunc_content is None + ) + ) if _thinking_exhausted: _exhaust_error = ( @@ -7878,6 +7894,34 @@ class AIAgent: "error": "Response remained truncated after 3 continuation attempts", } + if self.api_mode == "chat_completions": + assistant_message = response.choices[0].message + if assistant_message.tool_calls: + if truncated_tool_call_retries < 1: + truncated_tool_call_retries += 1 + self._vprint( + f"{self.log_prefix}⚠️ Truncated tool call detected — retrying API call...", + force=True, + ) + # Don't append the broken response to messages; + # just re-run the same API call from the current + # message state, giving the model another chance. + continue + self._vprint( + f"{self.log_prefix}⚠️ Truncated tool call response detected again — refusing to execute incomplete tool arguments.", + force=True, + ) + self._cleanup_task_resources(effective_task_id) + self._persist_session(messages, conversation_history) + return { + "final_response": None, + "messages": messages, + "api_calls": api_call_count, + "completed": False, + "partial": True, + "error": "Response truncated due to output length limit", + } + # If we have prior messages, roll back to last complete state if len(messages) > 1: self._vprint(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn") diff --git a/tests/gateway/test_api_server.py b/tests/gateway/test_api_server.py index 3b2160615d..038900089b 100644 --- a/tests/gateway/test_api_server.py +++ b/tests/gateway/test_api_server.py @@ -294,6 +294,40 @@ class TestModelsEndpoint: assert data["data"][0]["id"] == "hermes-agent" assert data["data"][0]["owned_by"] == "hermes" + @pytest.mark.asyncio + async def test_models_returns_profile_name(self): + """When running under a named profile, /v1/models advertises the profile name.""" + with patch("gateway.platforms.api_server.APIServerAdapter._resolve_model_name", return_value="lucas"): + adapter = _make_adapter() + app = _create_app(adapter) + async with TestClient(TestServer(app)) as cli: + resp = await cli.get("/v1/models") + assert resp.status == 200 + data = await resp.json() + assert data["data"][0]["id"] == "lucas" + assert data["data"][0]["root"] == "lucas" + + @pytest.mark.asyncio + async def test_models_returns_explicit_model_name(self): + """Explicit model_name in config overrides profile name.""" + extra = {"model_name": "my-custom-agent"} + config = PlatformConfig(enabled=True, extra=extra) + adapter = APIServerAdapter(config) + assert adapter._model_name == "my-custom-agent" + + def test_resolve_model_name_explicit(self): + assert APIServerAdapter._resolve_model_name("my-bot") == "my-bot" + + def test_resolve_model_name_default_profile(self): + """Default profile falls back to 'hermes-agent'.""" + with patch("hermes_cli.profiles.get_active_profile_name", return_value="default"): + assert APIServerAdapter._resolve_model_name("") == "hermes-agent" + + def test_resolve_model_name_named_profile(self): + """Named profile uses the profile name as model name.""" + with patch("hermes_cli.profiles.get_active_profile_name", return_value="lucas"): + assert APIServerAdapter._resolve_model_name("") == "lucas" + @pytest.mark.asyncio async def test_models_requires_auth(self, auth_adapter): app = _create_app(auth_adapter) diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index 11024820a9..a808df0981 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1949,6 +1949,68 @@ class TestRunConversation: assert result["final_response"] is not None assert "Thinking Budget Exhausted" in result["final_response"] + def test_length_with_tool_calls_returns_partial_without_executing_tools(self, agent): + self._setup_agent(agent) + bad_tc = _mock_tool_call( + name="write_file", + arguments='{"path":"report.md","content":"partial', + call_id="c1", + ) + resp = _mock_response(content="", finish_reason="length", tool_calls=[bad_tc]) + agent.client.chat.completions.create.return_value = resp + + with ( + patch("run_agent.handle_function_call") as mock_handle_function_call, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + result = agent.run_conversation("write the report") + + assert result["completed"] is False + assert result["partial"] is True + assert "truncated due to output length limit" in result["error"] + mock_handle_function_call.assert_not_called() + + def test_truncated_tool_call_retries_once_before_refusing(self, agent): + """When tool call args are truncated, the agent retries the API call + once. If the retry succeeds (valid JSON args), tool execution proceeds.""" + self._setup_agent(agent) + agent.valid_tool_names.add("write_file") + bad_tc = _mock_tool_call( + name="write_file", + arguments='{"path":"report.md","content":"partial', + call_id="c1", + ) + truncated_resp = _mock_response( + content="", finish_reason="length", tool_calls=[bad_tc], + ) + good_tc = _mock_tool_call( + name="write_file", + arguments='{"path":"report.md","content":"full content"}', + call_id="c2", + ) + good_resp = _mock_response( + content="", finish_reason="stop", tool_calls=[good_tc], + ) + with ( + patch("run_agent.handle_function_call", return_value='{"success":true}') as mock_hfc, + patch.object(agent, "_persist_session"), + patch.object(agent, "_save_trajectory"), + patch.object(agent, "_cleanup_task_resources"), + ): + # First call: truncated → retry. Second: valid → execute tool. + # Third: final text response. + final_resp = _mock_response(content="Done!", finish_reason="stop") + agent.client.chat.completions.create.side_effect = [ + truncated_resp, good_resp, final_resp, + ] + result = agent.run_conversation("write the report") + + # Tool was executed on the retry (good_resp) + mock_hfc.assert_called_once() + assert result["final_response"] == "Done!" + class TestRetryExhaustion: """Regression: retry_count > max_retries was dead code (off-by-one). @@ -3082,6 +3144,20 @@ class TestStreamingApiCall: assert tc[0].function.name == "search" assert tc[1].function.name == "read" + def test_truncated_tool_call_args_upgrade_finish_reason_to_length(self, agent): + chunks = [ + _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "write_file", '{"path":"x.txt","content":"hel')]), + ] + agent.client.chat.completions.create.return_value = iter(chunks) + + resp = agent._interruptible_streaming_api_call({"messages": []}) + + tc = resp.choices[0].message.tool_calls + assert len(tc) == 1 + assert tc[0].function.name == "write_file" + assert tc[0].function.arguments == '{"path":"x.txt","content":"hel' + assert resp.choices[0].finish_reason == "length" + def test_ollama_reused_index_separate_tool_calls(self, agent): """Ollama sends every tool call at index 0 with different ids. diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index 7c14d9f3da..0d5823bf6c 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -261,6 +261,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI | `API_SERVER_CORS_ORIGINS` | Comma-separated browser origins allowed to call the API server directly (for example `http://localhost:3000,http://127.0.0.1:3000`). Default: disabled. | | `API_SERVER_PORT` | Port for the API server (default: `8642`) | | `API_SERVER_HOST` | Host/bind address for the API server (default: `127.0.0.1`). Use `0.0.0.0` for network access only with `API_SERVER_KEY` and a narrow `API_SERVER_CORS_ORIGINS` allowlist. | +| `API_SERVER_MODEL_NAME` | Model name advertised on `/v1/models`. Defaults to the profile name (or `hermes-agent` for the default profile). Useful for multi-user setups where frontends like Open WebUI need distinct model names per connection. | | `MESSAGING_CWD` | Working directory for terminal commands in messaging mode (default: `~`) | | `GATEWAY_ALLOWED_USERS` | Comma-separated user IDs allowed across all platforms | | `GATEWAY_ALLOW_ALL_USERS` | Allow all users without allowlists (`true`/`false`, default: `false`) | diff --git a/website/docs/user-guide/features/api-server.md b/website/docs/user-guide/features/api-server.md index 71732285e7..58ae201fae 100644 --- a/website/docs/user-guide/features/api-server.md +++ b/website/docs/user-guide/features/api-server.md @@ -152,7 +152,7 @@ Delete a stored response. ### GET /v1/models -Lists `hermes-agent` as an available model. Required by most frontends for model discovery. +Lists the agent as an available model. The advertised model name defaults to the [profile](/docs/user-guide/features/profiles) name (or `hermes-agent` for the default profile). Required by most frontends for model discovery. ### GET /health @@ -193,6 +193,7 @@ The default bind address (`127.0.0.1`) is for local-only use. Browser access is | `API_SERVER_HOST` | `127.0.0.1` | Bind address (localhost only by default) | | `API_SERVER_KEY` | _(none)_ | Bearer token for auth | | `API_SERVER_CORS_ORIGINS` | _(none)_ | Comma-separated allowed browser origins | +| `API_SERVER_MODEL_NAME` | _(profile name)_ | Model name on `/v1/models`. Defaults to profile name, or `hermes-agent` for default profile. | ### config.yaml @@ -242,6 +243,36 @@ Any frontend that supports the OpenAI API format works. Tested/documented integr | OpenAI Python SDK | — | `OpenAI(base_url="http://localhost:8642/v1")` | | curl | — | Direct HTTP requests | +## Multi-User Setup with Profiles + +To give multiple users their own isolated Hermes instance (separate config, memory, skills), use [profiles](/docs/user-guide/features/profiles): + +```bash +# Create a profile per user +hermes profile create alice +hermes profile create bob + +# Configure each profile's API server on a different port +hermes -p alice config set API_SERVER_ENABLED true +hermes -p alice config set API_SERVER_PORT 8643 +hermes -p alice config set API_SERVER_KEY alice-secret + +hermes -p bob config set API_SERVER_ENABLED true +hermes -p bob config set API_SERVER_PORT 8644 +hermes -p bob config set API_SERVER_KEY bob-secret + +# Start each profile's gateway +hermes -p alice gateway & +hermes -p bob gateway & +``` + +Each profile's API server automatically advertises the profile name as the model ID: + +- `http://localhost:8643/v1/models` → model `alice` +- `http://localhost:8644/v1/models` → model `bob` + +In Open WebUI, add each as a separate connection. The model dropdown shows `alice` and `bob` as distinct models, each backed by a fully isolated Hermes instance. See the [Open WebUI guide](/docs/user-guide/messaging/open-webui#multi-user-setup-with-profiles) for details. + ## Limitations - **Response storage** — stored responses (for `previous_response_id`) are persisted in SQLite and survive gateway restarts. Max 100 stored responses (LRU eviction). diff --git a/website/docs/user-guide/messaging/open-webui.md b/website/docs/user-guide/messaging/open-webui.md index 7d4eaee361..71860d367f 100644 --- a/website/docs/user-guide/messaging/open-webui.md +++ b/website/docs/user-guide/messaging/open-webui.md @@ -60,7 +60,7 @@ docker run -d -p 3000:8080 \ ### 4. Open the UI -Go to **http://localhost:3000**. Create your admin account (the first user becomes admin). You should see **hermes-agent** in the model dropdown. Start chatting! +Go to **http://localhost:3000**. Create your admin account (the first user becomes admin). You should see your agent in the model dropdown (named after your profile, or **hermes-agent** for the default profile). Start chatting! ## Docker Compose Setup @@ -106,7 +106,7 @@ If you prefer to configure the connection through the UI instead of environment 7. Click the **checkmark** to verify the connection 8. **Save** -The **hermes-agent** model should now appear in the model dropdown. +Your agent model should now appear in the model dropdown (named after your profile, or **hermes-agent** for the default profile). :::warning Environment variables only take effect on Open WebUI's **first launch**. After that, connection settings are stored in its internal database. To change them later, use the Admin UI or delete the Docker volume and start fresh. @@ -196,6 +196,49 @@ Hermes Agent may be executing multiple tool calls (reading files, running comman Make sure your `OPENAI_API_KEY` in Open WebUI matches the `API_SERVER_KEY` in Hermes Agent. +## Multi-User Setup with Profiles + +To run separate Hermes instances per user — each with their own config, memory, and skills — use [profiles](/docs/user-guide/features/profiles). Each profile runs its own API server on a different port and automatically advertises the profile name as the model in Open WebUI. + +### 1. Create profiles and configure API servers + +```bash +hermes profile create alice +hermes -p alice config set API_SERVER_ENABLED true +hermes -p alice config set API_SERVER_PORT 8643 +hermes -p alice config set API_SERVER_KEY alice-secret + +hermes profile create bob +hermes -p bob config set API_SERVER_ENABLED true +hermes -p bob config set API_SERVER_PORT 8644 +hermes -p bob config set API_SERVER_KEY bob-secret +``` + +### 2. Start each gateway + +```bash +hermes -p alice gateway & +hermes -p bob gateway & +``` + +### 3. Add connections in Open WebUI + +In **Admin Settings** → **Connections** → **OpenAI API** → **Manage**, add one connection per profile: + +| Connection | URL | API Key | +|-----------|-----|---------| +| Alice | `http://host.docker.internal:8643/v1` | `alice-secret` | +| Bob | `http://host.docker.internal:8644/v1` | `bob-secret` | + +The model dropdown will show `alice` and `bob` as distinct models. You can assign models to Open WebUI users via the admin panel, giving each user their own isolated Hermes agent. + +:::tip Custom Model Names +The model name defaults to the profile name. To override it, set `API_SERVER_MODEL_NAME` in the profile's `.env`: +```bash +hermes -p alice config set API_SERVER_MODEL_NAME "Alice's Agent" +``` +::: + ## Linux Docker (no Docker Desktop) On Linux without Docker Desktop, `host.docker.internal` doesn't resolve by default. Options: