mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-04-29 01:31:41 +00:00
fix: clean up API server — remove dead code, deduplicate model resolution, cache streaming config, add setup integration and security docs
- Remove unused _write_sse_chat_completion pseudo-streaming method (dead code) - Extract _resolve_model() helper in gateway/run.py, use from api_server - Cache streaming config at GatewayRunner init instead of YAML parsing per-message - Add API_SERVER_* env vars to OPTIONAL_ENV_VARS for hermes setup integration - Add security warning about network exposure without API_SERVER_KEY
This commit is contained in:
parent
d54280ea03
commit
b800e63137
4 changed files with 100 additions and 86 deletions
|
|
@ -182,26 +182,10 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
base_url, etc. from config.yaml / env vars.
|
||||
"""
|
||||
from run_agent import AIAgent
|
||||
from gateway.run import _resolve_runtime_agent_kwargs
|
||||
from gateway.run import _resolve_runtime_agent_kwargs, _resolve_model
|
||||
|
||||
runtime_kwargs = _resolve_runtime_agent_kwargs()
|
||||
|
||||
# Read model from env/config (same as gateway run.py)
|
||||
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
|
||||
try:
|
||||
import yaml
|
||||
from pathlib import Path
|
||||
config_yaml_path = Path.home() / ".hermes" / "config.yaml"
|
||||
if config_yaml_path.exists():
|
||||
with open(config_yaml_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, str):
|
||||
model = model_cfg
|
||||
elif isinstance(model_cfg, dict):
|
||||
model = model_cfg.get("default", model)
|
||||
except Exception:
|
||||
pass
|
||||
model = _resolve_model()
|
||||
|
||||
max_iterations = int(os.getenv("HERMES_MAX_ITERATIONS", "90"))
|
||||
|
||||
|
|
@ -432,54 +416,6 @@ class APIServerAdapter(BasePlatformAdapter):
|
|||
|
||||
return response
|
||||
|
||||
async def _write_sse_chat_completion(
|
||||
self, request: "web.Request", completion_id: str, model: str,
|
||||
created: int, content: str, usage: Dict[str, int],
|
||||
) -> "web.StreamResponse":
|
||||
"""Write a chat completion as SSE chunks (pseudo-streaming).
|
||||
|
||||
Returns the full response as three SSE events (role, content, finish)
|
||||
followed by [DONE]. Not true token-by-token streaming, but compatible
|
||||
with clients like Open WebUI that require SSE format.
|
||||
"""
|
||||
response = web.StreamResponse(
|
||||
status=200,
|
||||
headers={"Content-Type": "text/event-stream", "Cache-Control": "no-cache"},
|
||||
)
|
||||
await response.prepare(request)
|
||||
|
||||
# Role chunk
|
||||
role_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {"role": "assistant"}, "finish_reason": None}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(role_chunk)}\n\n".encode())
|
||||
|
||||
# Content chunk (full response in one chunk for now)
|
||||
content_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}],
|
||||
}
|
||||
await response.write(f"data: {json.dumps(content_chunk)}\n\n".encode())
|
||||
|
||||
# Finish chunk
|
||||
finish_chunk = {
|
||||
"id": completion_id, "object": "chat.completion.chunk",
|
||||
"created": created, "model": model,
|
||||
"choices": [{"index": 0, "delta": {}, "finish_reason": "stop"}],
|
||||
"usage": {
|
||||
"prompt_tokens": usage.get("input_tokens", 0),
|
||||
"completion_tokens": usage.get("output_tokens", 0),
|
||||
"total_tokens": usage.get("total_tokens", 0),
|
||||
},
|
||||
}
|
||||
await response.write(f"data: {json.dumps(finish_chunk)}\n\n".encode())
|
||||
await response.write(b"data: [DONE]\n\n")
|
||||
|
||||
return response
|
||||
|
||||
async def _handle_responses(self, request: "web.Request") -> "web.Response":
|
||||
"""POST /v1/responses — OpenAI Responses API format."""
|
||||
auth_err = self._check_auth(request)
|
||||
|
|
|
|||
|
|
@ -166,6 +166,28 @@ from gateway.platforms.base import BasePlatformAdapter, MessageEvent, MessageTyp
|
|||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _resolve_model() -> str:
|
||||
"""Resolve the model name from env vars and config.yaml.
|
||||
|
||||
Priority: HERMES_MODEL env > LLM_MODEL env > config.yaml model.default > fallback.
|
||||
"""
|
||||
model = os.getenv("HERMES_MODEL") or os.getenv("LLM_MODEL") or "anthropic/claude-opus-4.6"
|
||||
try:
|
||||
import yaml
|
||||
_cfg_path = Path.home() / ".hermes" / "config.yaml"
|
||||
if _cfg_path.exists():
|
||||
with open(_cfg_path, encoding="utf-8") as f:
|
||||
cfg = yaml.safe_load(f) or {}
|
||||
model_cfg = cfg.get("model", {})
|
||||
if isinstance(model_cfg, str):
|
||||
model = model_cfg
|
||||
elif isinstance(model_cfg, dict):
|
||||
model = model_cfg.get("default", model)
|
||||
except Exception:
|
||||
pass
|
||||
return model
|
||||
|
||||
|
||||
def _resolve_runtime_agent_kwargs() -> dict:
|
||||
"""Resolve provider credentials for gateway-created AIAgent instances."""
|
||||
from hermes_cli.runtime_provider import (
|
||||
|
|
@ -207,6 +229,7 @@ class GatewayRunner:
|
|||
self._reasoning_config = self._load_reasoning_config()
|
||||
self._provider_routing = self._load_provider_routing()
|
||||
self._fallback_model = self._load_fallback_model()
|
||||
self._streaming_config = self._load_streaming_config()
|
||||
|
||||
# Wire process registry into session store for reset protection
|
||||
from tools.process_registry import process_registry
|
||||
|
|
@ -461,6 +484,40 @@ class GatewayRunner:
|
|||
pass
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _load_streaming_config() -> dict:
|
||||
"""Load streaming config from config.yaml at startup.
|
||||
|
||||
Returns a dict like {"enabled": False, "telegram": True, ...}.
|
||||
Per-platform keys override the global 'enabled' flag.
|
||||
The HERMES_STREAMING_ENABLED env var overrides everything.
|
||||
"""
|
||||
config = {"enabled": False}
|
||||
try:
|
||||
import yaml as _y
|
||||
cfg_path = _hermes_home / "config.yaml"
|
||||
if cfg_path.exists():
|
||||
with open(cfg_path, encoding="utf-8") as _f:
|
||||
cfg = _y.safe_load(_f) or {}
|
||||
s_cfg = cfg.get("streaming", {})
|
||||
if isinstance(s_cfg, dict):
|
||||
config = s_cfg
|
||||
except Exception:
|
||||
pass
|
||||
# Env var override
|
||||
if os.getenv("HERMES_STREAMING_ENABLED", "").lower() in ("true", "1", "yes"):
|
||||
config["enabled"] = True
|
||||
return config
|
||||
|
||||
def _is_streaming_enabled(self, platform_key: str) -> bool:
|
||||
"""Check if streaming is enabled for a given platform."""
|
||||
cfg = self._streaming_config
|
||||
# Per-platform override
|
||||
if platform_key and cfg.get(platform_key) is not None:
|
||||
return str(cfg[platform_key]).lower() in ("true", "1", "yes")
|
||||
# Global default
|
||||
return str(cfg.get("enabled", False)).lower() in ("true", "1", "yes")
|
||||
|
||||
async def start(self) -> bool:
|
||||
"""
|
||||
Start the gateway and all configured platform adapters.
|
||||
|
|
@ -3084,25 +3141,8 @@ class GatewayRunner:
|
|||
_stream_q = None
|
||||
_stream_done = None
|
||||
_stream_msg_id = [None]
|
||||
_streaming_enabled = False
|
||||
|
||||
try:
|
||||
import yaml as _s_yaml
|
||||
_s_cfg_path = _hermes_home / "config.yaml"
|
||||
if _s_cfg_path.exists():
|
||||
with open(_s_cfg_path, encoding="utf-8") as _s_f:
|
||||
_s_data = _s_yaml.safe_load(_s_f) or {}
|
||||
_s_cfg = _s_data.get("streaming", {})
|
||||
if isinstance(_s_cfg, dict):
|
||||
_platform_key = source.platform.value if source.platform else ""
|
||||
if _platform_key and _s_cfg.get(_platform_key) is not None:
|
||||
_streaming_enabled = str(_s_cfg[_platform_key]).lower() in ("true", "1", "yes")
|
||||
else:
|
||||
_streaming_enabled = str(_s_cfg.get("enabled", False)).lower() in ("true", "1", "yes")
|
||||
except Exception:
|
||||
pass
|
||||
if os.getenv("HERMES_STREAMING_ENABLED", "").lower() in ("true", "1", "yes"):
|
||||
_streaming_enabled = True
|
||||
_platform_key = source.platform.value if source.platform else ""
|
||||
_streaming_enabled = self._is_streaming_enabled(_platform_key)
|
||||
|
||||
if _streaming_enabled:
|
||||
_stream_q = queue.Queue()
|
||||
|
|
|
|||
|
|
@ -494,6 +494,38 @@ OPTIONAL_ENV_VARS = {
|
|||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── API Server ──
|
||||
"API_SERVER_ENABLED": {
|
||||
"description": "Enable the OpenAI-compatible API server (true/false). Allows frontends like Open WebUI to connect.",
|
||||
"prompt": "Enable API server (true/false)",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
},
|
||||
"API_SERVER_KEY": {
|
||||
"description": "Bearer token for API server authentication. If not set, all requests are allowed (local-only use).",
|
||||
"prompt": "API server auth key (leave empty for no auth)",
|
||||
"url": None,
|
||||
"password": True,
|
||||
"category": "messaging",
|
||||
},
|
||||
"API_SERVER_PORT": {
|
||||
"description": "Port for the API server (default: 8642).",
|
||||
"prompt": "API server port",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
"API_SERVER_HOST": {
|
||||
"description": "Bind address for the API server (default: 127.0.0.1). Use 0.0.0.0 for network access (set API_SERVER_KEY!).",
|
||||
"prompt": "API server bind address",
|
||||
"url": None,
|
||||
"password": False,
|
||||
"category": "messaging",
|
||||
"advanced": True,
|
||||
},
|
||||
|
||||
# ── Agent settings ──
|
||||
"MESSAGING_CWD": {
|
||||
"description": "Working directory for terminal commands via messaging",
|
||||
|
|
|
|||
|
|
@ -165,11 +165,17 @@ This means you can customize behavior per-frontend without losing capabilities:
|
|||
Bearer token auth via the `Authorization` header:
|
||||
|
||||
```
|
||||
Authorization: Bearer your-secret-key
|
||||
Authorization: Bearer ***
|
||||
```
|
||||
|
||||
Configure the key via `API_SERVER_KEY` env var. If no key is set, all requests are allowed (for local-only use).
|
||||
|
||||
:::warning Security
|
||||
The API server gives full access to hermes-agent's toolset, **including terminal commands**. If you change the bind address to `0.0.0.0` (network-accessible), **always set `API_SERVER_KEY`** — without it, anyone on your network can execute arbitrary commands on your machine.
|
||||
|
||||
The default bind address (`127.0.0.1`) is safe for local-only use.
|
||||
:::
|
||||
|
||||
## Configuration
|
||||
|
||||
### Environment Variables
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue