Merge branch 'main' of github.com:NousResearch/hermes-agent into feat/ink-refactor

This commit is contained in:
Brooklyn Nicholson 2026-04-09 19:12:31 -05:00
commit aa5b697a9d
9 changed files with 277 additions and 14 deletions

View file

@ -901,6 +901,9 @@ def _apply_env_overrides(config: GatewayConfig) -> None:
pass
if api_server_host:
config.platforms[Platform.API_SERVER].extra["host"] = api_server_host
api_server_model_name = os.getenv("API_SERVER_MODEL_NAME", "")
if api_server_model_name:
config.platforms[Platform.API_SERVER].extra["model_name"] = api_server_model_name
# Webhook platform
webhook_enabled = os.getenv("WEBHOOK_ENABLED", "").lower() in ("true", "1", "yes")

View file

@ -299,6 +299,9 @@ class APIServerAdapter(BasePlatformAdapter):
self._cors_origins: tuple[str, ...] = self._parse_cors_origins(
extra.get("cors_origins", os.getenv("API_SERVER_CORS_ORIGINS", "")),
)
self._model_name: str = self._resolve_model_name(
extra.get("model_name", os.getenv("API_SERVER_MODEL_NAME", "")),
)
self._app: Optional["web.Application"] = None
self._runner: Optional["web.AppRunner"] = None
self._site: Optional["web.TCPSite"] = None
@ -324,6 +327,26 @@ class APIServerAdapter(BasePlatformAdapter):
return tuple(str(item).strip() for item in items if str(item).strip())
@staticmethod
def _resolve_model_name(explicit: str) -> str:
"""Derive the advertised model name for /v1/models.
Priority:
1. Explicit override (config extra or API_SERVER_MODEL_NAME env var)
2. Active profile name (so each profile advertises a distinct model)
3. Fallback: "hermes-agent"
"""
if explicit and explicit.strip():
return explicit.strip()
try:
from hermes_cli.profiles import get_active_profile_name
profile = get_active_profile_name()
if profile and profile not in ("default", "custom"):
return profile
except Exception:
pass
return "hermes-agent"
def _cors_headers_for_origin(self, origin: str) -> Optional[Dict[str, str]]:
"""Return CORS headers for an allowed browser origin."""
if not origin or not self._cors_origins:
@ -468,12 +491,12 @@ class APIServerAdapter(BasePlatformAdapter):
"object": "list",
"data": [
{
"id": "hermes-agent",
"id": self._model_name,
"object": "model",
"created": int(time.time()),
"owned_by": "hermes",
"permission": [],
"root": "hermes-agent",
"root": self._model_name,
"parent": None,
}
],
@ -546,7 +569,7 @@ class APIServerAdapter(BasePlatformAdapter):
# history already set from request body above
completion_id = f"chatcmpl-{uuid.uuid4().hex[:29]}"
model_name = body.get("model", "hermes-agent")
model_name = body.get("model", self._model_name)
created = int(time.time())
if stream:
@ -923,7 +946,7 @@ class APIServerAdapter(BasePlatformAdapter):
"object": "response",
"status": "completed",
"created_at": created_at,
"model": body.get("model", "hermes-agent"),
"model": body.get("model", self._model_name),
"output": output_items,
"usage": {
"input_tokens": usage.get("input_tokens", 0),
@ -1653,8 +1676,8 @@ class APIServerAdapter(BasePlatformAdapter):
self._mark_connected()
logger.info(
"[%s] API server listening on http://%s:%d",
self.name, self._host, self._port,
"[%s] API server listening on http://%s:%d (model: %s)",
self.name, self._host, self._port, self._model_name,
)
return True

View file

@ -1216,6 +1216,14 @@ OPTIONAL_ENV_VARS = {
"category": "messaging",
"advanced": True,
},
"API_SERVER_MODEL_NAME": {
"description": "Model name advertised on /v1/models. Defaults to the profile name (or 'hermes-agent' for the default profile). Useful for multi-user setups with OpenWebUI.",
"prompt": "API server model name",
"url": None,
"password": False,
"category": "messaging",
"advanced": True,
},
"WEBHOOK_ENABLED": {
"description": "Enable the webhook platform adapter for receiving events from GitHub, GitLab, etc.",
"prompt": "Enable webhooks (true/false)",

View file

@ -4584,20 +4584,31 @@ class AIAgent:
# Build mock response matching non-streaming shape
full_content = "".join(content_parts) or None
mock_tool_calls = None
has_truncated_tool_args = False
if tool_calls_acc:
mock_tool_calls = []
for idx in sorted(tool_calls_acc):
tc = tool_calls_acc[idx]
arguments = tc["function"]["arguments"]
if arguments and arguments.strip():
try:
json.loads(arguments)
except json.JSONDecodeError:
has_truncated_tool_args = True
mock_tool_calls.append(SimpleNamespace(
id=tc["id"],
type=tc["type"],
extra_content=tc.get("extra_content"),
function=SimpleNamespace(
name=tc["function"]["name"],
arguments=tc["function"]["arguments"],
arguments=arguments,
),
))
effective_finish_reason = finish_reason or "stop"
if has_truncated_tool_args:
effective_finish_reason = "length"
full_reasoning = "".join(reasoning_parts) or None
mock_message = SimpleNamespace(
role=role,
@ -4608,7 +4619,7 @@ class AIAgent:
mock_choice = SimpleNamespace(
index=0,
message=mock_message,
finish_reason=finish_reason or "stop",
finish_reason=effective_finish_reason,
)
return SimpleNamespace(
id="stream-" + str(uuid.uuid4()),
@ -7320,6 +7331,7 @@ class AIAgent:
interrupted = False
codex_ack_continuations = 0
length_continue_retries = 0
truncated_tool_call_retries = 0
truncated_response_prefix = ""
compression_attempts = 0
_turn_exit_reason = "unknown" # Diagnostic: why the loop ended
@ -7788,9 +7800,11 @@ class AIAgent:
# retries are pointless. Detect this early and give a
# targeted error instead of wasting 3 API calls.
_trunc_content = None
_trunc_has_tool_calls = False
if self.api_mode == "chat_completions":
_trunc_msg = response.choices[0].message if (hasattr(response, "choices") and response.choices) else None
_trunc_content = getattr(_trunc_msg, "content", None) if _trunc_msg else None
_trunc_has_tool_calls = bool(getattr(_trunc_msg, "tool_calls", None)) if _trunc_msg else False
elif self.api_mode == "anthropic_messages":
# Anthropic response.content is a list of blocks
_text_parts = []
@ -7800,9 +7814,11 @@ class AIAgent:
_trunc_content = "\n".join(_text_parts) if _text_parts else None
_thinking_exhausted = (
_trunc_content is not None
and not self._has_content_after_think_block(_trunc_content)
) or _trunc_content is None
not _trunc_has_tool_calls and (
(_trunc_content is not None and not self._has_content_after_think_block(_trunc_content))
or _trunc_content is None
)
)
if _thinking_exhausted:
_exhaust_error = (
@ -7878,6 +7894,34 @@ class AIAgent:
"error": "Response remained truncated after 3 continuation attempts",
}
if self.api_mode == "chat_completions":
assistant_message = response.choices[0].message
if assistant_message.tool_calls:
if truncated_tool_call_retries < 1:
truncated_tool_call_retries += 1
self._vprint(
f"{self.log_prefix}⚠️ Truncated tool call detected — retrying API call...",
force=True,
)
# Don't append the broken response to messages;
# just re-run the same API call from the current
# message state, giving the model another chance.
continue
self._vprint(
f"{self.log_prefix}⚠️ Truncated tool call response detected again — refusing to execute incomplete tool arguments.",
force=True,
)
self._cleanup_task_resources(effective_task_id)
self._persist_session(messages, conversation_history)
return {
"final_response": None,
"messages": messages,
"api_calls": api_call_count,
"completed": False,
"partial": True,
"error": "Response truncated due to output length limit",
}
# If we have prior messages, roll back to last complete state
if len(messages) > 1:
self._vprint(f"{self.log_prefix} ⏪ Rolling back to last complete assistant turn")

View file

@ -294,6 +294,40 @@ class TestModelsEndpoint:
assert data["data"][0]["id"] == "hermes-agent"
assert data["data"][0]["owned_by"] == "hermes"
@pytest.mark.asyncio
async def test_models_returns_profile_name(self):
"""When running under a named profile, /v1/models advertises the profile name."""
with patch("gateway.platforms.api_server.APIServerAdapter._resolve_model_name", return_value="lucas"):
adapter = _make_adapter()
app = _create_app(adapter)
async with TestClient(TestServer(app)) as cli:
resp = await cli.get("/v1/models")
assert resp.status == 200
data = await resp.json()
assert data["data"][0]["id"] == "lucas"
assert data["data"][0]["root"] == "lucas"
@pytest.mark.asyncio
async def test_models_returns_explicit_model_name(self):
"""Explicit model_name in config overrides profile name."""
extra = {"model_name": "my-custom-agent"}
config = PlatformConfig(enabled=True, extra=extra)
adapter = APIServerAdapter(config)
assert adapter._model_name == "my-custom-agent"
def test_resolve_model_name_explicit(self):
assert APIServerAdapter._resolve_model_name("my-bot") == "my-bot"
def test_resolve_model_name_default_profile(self):
"""Default profile falls back to 'hermes-agent'."""
with patch("hermes_cli.profiles.get_active_profile_name", return_value="default"):
assert APIServerAdapter._resolve_model_name("") == "hermes-agent"
def test_resolve_model_name_named_profile(self):
"""Named profile uses the profile name as model name."""
with patch("hermes_cli.profiles.get_active_profile_name", return_value="lucas"):
assert APIServerAdapter._resolve_model_name("") == "lucas"
@pytest.mark.asyncio
async def test_models_requires_auth(self, auth_adapter):
app = _create_app(auth_adapter)

View file

@ -1949,6 +1949,68 @@ class TestRunConversation:
assert result["final_response"] is not None
assert "Thinking Budget Exhausted" in result["final_response"]
def test_length_with_tool_calls_returns_partial_without_executing_tools(self, agent):
self._setup_agent(agent)
bad_tc = _mock_tool_call(
name="write_file",
arguments='{"path":"report.md","content":"partial',
call_id="c1",
)
resp = _mock_response(content="", finish_reason="length", tool_calls=[bad_tc])
agent.client.chat.completions.create.return_value = resp
with (
patch("run_agent.handle_function_call") as mock_handle_function_call,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
result = agent.run_conversation("write the report")
assert result["completed"] is False
assert result["partial"] is True
assert "truncated due to output length limit" in result["error"]
mock_handle_function_call.assert_not_called()
def test_truncated_tool_call_retries_once_before_refusing(self, agent):
"""When tool call args are truncated, the agent retries the API call
once. If the retry succeeds (valid JSON args), tool execution proceeds."""
self._setup_agent(agent)
agent.valid_tool_names.add("write_file")
bad_tc = _mock_tool_call(
name="write_file",
arguments='{"path":"report.md","content":"partial',
call_id="c1",
)
truncated_resp = _mock_response(
content="", finish_reason="length", tool_calls=[bad_tc],
)
good_tc = _mock_tool_call(
name="write_file",
arguments='{"path":"report.md","content":"full content"}',
call_id="c2",
)
good_resp = _mock_response(
content="", finish_reason="stop", tool_calls=[good_tc],
)
with (
patch("run_agent.handle_function_call", return_value='{"success":true}') as mock_hfc,
patch.object(agent, "_persist_session"),
patch.object(agent, "_save_trajectory"),
patch.object(agent, "_cleanup_task_resources"),
):
# First call: truncated → retry. Second: valid → execute tool.
# Third: final text response.
final_resp = _mock_response(content="Done!", finish_reason="stop")
agent.client.chat.completions.create.side_effect = [
truncated_resp, good_resp, final_resp,
]
result = agent.run_conversation("write the report")
# Tool was executed on the retry (good_resp)
mock_hfc.assert_called_once()
assert result["final_response"] == "Done!"
class TestRetryExhaustion:
"""Regression: retry_count > max_retries was dead code (off-by-one).
@ -3082,6 +3144,20 @@ class TestStreamingApiCall:
assert tc[0].function.name == "search"
assert tc[1].function.name == "read"
def test_truncated_tool_call_args_upgrade_finish_reason_to_length(self, agent):
chunks = [
_make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "write_file", '{"path":"x.txt","content":"hel')]),
]
agent.client.chat.completions.create.return_value = iter(chunks)
resp = agent._interruptible_streaming_api_call({"messages": []})
tc = resp.choices[0].message.tool_calls
assert len(tc) == 1
assert tc[0].function.name == "write_file"
assert tc[0].function.arguments == '{"path":"x.txt","content":"hel'
assert resp.choices[0].finish_reason == "length"
def test_ollama_reused_index_separate_tool_calls(self, agent):
"""Ollama sends every tool call at index 0 with different ids.

View file

@ -261,6 +261,7 @@ For cloud sandbox backends, persistence is filesystem-oriented. `TERMINAL_LIFETI
| `API_SERVER_CORS_ORIGINS` | Comma-separated browser origins allowed to call the API server directly (for example `http://localhost:3000,http://127.0.0.1:3000`). Default: disabled. |
| `API_SERVER_PORT` | Port for the API server (default: `8642`) |
| `API_SERVER_HOST` | Host/bind address for the API server (default: `127.0.0.1`). Use `0.0.0.0` for network access only with `API_SERVER_KEY` and a narrow `API_SERVER_CORS_ORIGINS` allowlist. |
| `API_SERVER_MODEL_NAME` | Model name advertised on `/v1/models`. Defaults to the profile name (or `hermes-agent` for the default profile). Useful for multi-user setups where frontends like Open WebUI need distinct model names per connection. |
| `MESSAGING_CWD` | Working directory for terminal commands in messaging mode (default: `~`) |
| `GATEWAY_ALLOWED_USERS` | Comma-separated user IDs allowed across all platforms |
| `GATEWAY_ALLOW_ALL_USERS` | Allow all users without allowlists (`true`/`false`, default: `false`) |

View file

@ -152,7 +152,7 @@ Delete a stored response.
### GET /v1/models
Lists `hermes-agent` as an available model. Required by most frontends for model discovery.
Lists the agent as an available model. The advertised model name defaults to the [profile](/docs/user-guide/features/profiles) name (or `hermes-agent` for the default profile). Required by most frontends for model discovery.
### GET /health
@ -193,6 +193,7 @@ The default bind address (`127.0.0.1`) is for local-only use. Browser access is
| `API_SERVER_HOST` | `127.0.0.1` | Bind address (localhost only by default) |
| `API_SERVER_KEY` | _(none)_ | Bearer token for auth |
| `API_SERVER_CORS_ORIGINS` | _(none)_ | Comma-separated allowed browser origins |
| `API_SERVER_MODEL_NAME` | _(profile name)_ | Model name on `/v1/models`. Defaults to profile name, or `hermes-agent` for default profile. |
### config.yaml
@ -242,6 +243,36 @@ Any frontend that supports the OpenAI API format works. Tested/documented integr
| OpenAI Python SDK | — | `OpenAI(base_url="http://localhost:8642/v1")` |
| curl | — | Direct HTTP requests |
## Multi-User Setup with Profiles
To give multiple users their own isolated Hermes instance (separate config, memory, skills), use [profiles](/docs/user-guide/features/profiles):
```bash
# Create a profile per user
hermes profile create alice
hermes profile create bob
# Configure each profile's API server on a different port
hermes -p alice config set API_SERVER_ENABLED true
hermes -p alice config set API_SERVER_PORT 8643
hermes -p alice config set API_SERVER_KEY alice-secret
hermes -p bob config set API_SERVER_ENABLED true
hermes -p bob config set API_SERVER_PORT 8644
hermes -p bob config set API_SERVER_KEY bob-secret
# Start each profile's gateway
hermes -p alice gateway &
hermes -p bob gateway &
```
Each profile's API server automatically advertises the profile name as the model ID:
- `http://localhost:8643/v1/models` → model `alice`
- `http://localhost:8644/v1/models` → model `bob`
In Open WebUI, add each as a separate connection. The model dropdown shows `alice` and `bob` as distinct models, each backed by a fully isolated Hermes instance. See the [Open WebUI guide](/docs/user-guide/messaging/open-webui#multi-user-setup-with-profiles) for details.
## Limitations
- **Response storage** — stored responses (for `previous_response_id`) are persisted in SQLite and survive gateway restarts. Max 100 stored responses (LRU eviction).

View file

@ -60,7 +60,7 @@ docker run -d -p 3000:8080 \
### 4. Open the UI
Go to **http://localhost:3000**. Create your admin account (the first user becomes admin). You should see **hermes-agent** in the model dropdown. Start chatting!
Go to **http://localhost:3000**. Create your admin account (the first user becomes admin). You should see your agent in the model dropdown (named after your profile, or **hermes-agent** for the default profile). Start chatting!
## Docker Compose Setup
@ -106,7 +106,7 @@ If you prefer to configure the connection through the UI instead of environment
7. Click the **checkmark** to verify the connection
8. **Save**
The **hermes-agent** model should now appear in the model dropdown.
Your agent model should now appear in the model dropdown (named after your profile, or **hermes-agent** for the default profile).
:::warning
Environment variables only take effect on Open WebUI's **first launch**. After that, connection settings are stored in its internal database. To change them later, use the Admin UI or delete the Docker volume and start fresh.
@ -196,6 +196,49 @@ Hermes Agent may be executing multiple tool calls (reading files, running comman
Make sure your `OPENAI_API_KEY` in Open WebUI matches the `API_SERVER_KEY` in Hermes Agent.
## Multi-User Setup with Profiles
To run separate Hermes instances per user — each with their own config, memory, and skills — use [profiles](/docs/user-guide/features/profiles). Each profile runs its own API server on a different port and automatically advertises the profile name as the model in Open WebUI.
### 1. Create profiles and configure API servers
```bash
hermes profile create alice
hermes -p alice config set API_SERVER_ENABLED true
hermes -p alice config set API_SERVER_PORT 8643
hermes -p alice config set API_SERVER_KEY alice-secret
hermes profile create bob
hermes -p bob config set API_SERVER_ENABLED true
hermes -p bob config set API_SERVER_PORT 8644
hermes -p bob config set API_SERVER_KEY bob-secret
```
### 2. Start each gateway
```bash
hermes -p alice gateway &
hermes -p bob gateway &
```
### 3. Add connections in Open WebUI
In **Admin Settings****Connections****OpenAI API****Manage**, add one connection per profile:
| Connection | URL | API Key |
|-----------|-----|---------|
| Alice | `http://host.docker.internal:8643/v1` | `alice-secret` |
| Bob | `http://host.docker.internal:8644/v1` | `bob-secret` |
The model dropdown will show `alice` and `bob` as distinct models. You can assign models to Open WebUI users via the admin panel, giving each user their own isolated Hermes agent.
:::tip Custom Model Names
The model name defaults to the profile name. To override it, set `API_SERVER_MODEL_NAME` in the profile's `.env`:
```bash
hermes -p alice config set API_SERVER_MODEL_NAME "Alice's Agent"
```
:::
## Linux Docker (no Docker Desktop)
On Linux without Docker Desktop, `host.docker.internal` doesn't resolve by default. Options: