mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
feat(proxy): local OpenAI-compatible proxy for OAuth providers (#25969)
Adds 'hermes proxy start' — a local HTTP server that lets external apps (OpenViking, Karakeep, Open WebUI, ...) use a Hermes-managed provider subscription as their LLM endpoint. The proxy attaches the user's real OAuth-resolved credentials to each forwarded request, refreshing them automatically; the client can send any bearer (it gets stripped). Ships with one adapter — Nous Portal. The UpstreamAdapter ABC and registry in hermes_cli/proxy/adapters/ are designed for additional OAuth providers to plug in by name without server changes. Commands: hermes proxy start [--provider nous] [--host 127.0.0.1] [--port 8645] hermes proxy status hermes proxy providers Allowed Portal paths: /v1/chat/completions, /v1/completions, /v1/embeddings, /v1/models. Anything else returns 404 with a clear error pointing at the allowed list. aiohttp is gated like gateway/platforms/api_server.py (try-import, clean runtime error if missing). No new core dependency. Tests: 24 unit tests + 1 separate E2E that spawns the real subprocess and verifies the upstream receives the right bearer with the client's header stripped.
This commit is contained in:
parent
34fc94d1f4
commit
ccb5aae0d2
11 changed files with 1466 additions and 1 deletions
|
|
@ -1452,6 +1452,17 @@ def cmd_gateway(args):
|
|||
gateway_command(args)
|
||||
|
||||
|
||||
def cmd_proxy(args):
|
||||
"""Local OpenAI-compatible proxy to OAuth providers."""
|
||||
# Lazy import — pulls in aiohttp, which is gated behind an extras install
|
||||
# for users who don't run the proxy or the messaging gateway.
|
||||
from hermes_cli.proxy.cli import cmd_proxy as _cmd_proxy
|
||||
|
||||
rc = _cmd_proxy(args)
|
||||
if isinstance(rc, int) and rc != 0:
|
||||
raise SystemExit(rc)
|
||||
|
||||
|
||||
def cmd_whatsapp(args):
|
||||
"""Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
|
||||
_require_tty("whatsapp")
|
||||
|
|
@ -9385,7 +9396,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
|
|||
"config", "cron", "curator", "dashboard", "debug", "doctor",
|
||||
"dump", "fallback", "gateway", "hooks", "import", "insights",
|
||||
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
|
||||
"model", "pairing", "plugins", "profile", "sessions", "setup",
|
||||
"model", "pairing", "plugins", "profile", "proxy", "sessions", "setup",
|
||||
"skills", "slack", "status", "tools", "uninstall", "update",
|
||||
"version", "webhook", "whatsapp", "chat",
|
||||
# Help-ish invocations — plugin commands not being listed in
|
||||
|
|
@ -9727,6 +9738,51 @@ def main():
|
|||
help="Skip the confirmation prompt",
|
||||
)
|
||||
|
||||
# =========================================================================
|
||||
# proxy command — local OpenAI-compatible proxy that attaches the user's
|
||||
# OAuth-authenticated provider credentials to outbound requests. Lets
|
||||
# external apps (OpenViking, Karakeep, Open WebUI, ...) ride a logged-in
|
||||
# subscription without copy-pasting static API keys.
|
||||
# =========================================================================
|
||||
proxy_parser = subparsers.add_parser(
|
||||
"proxy",
|
||||
help="Local OpenAI-compatible proxy to OAuth providers",
|
||||
description=(
|
||||
"Run a local HTTP server that forwards OpenAI-compatible requests "
|
||||
"to an OAuth-authenticated provider (e.g. Nous Portal). External "
|
||||
"apps can point at the proxy with any bearer token; the proxy "
|
||||
"attaches your real credentials."
|
||||
),
|
||||
)
|
||||
proxy_subparsers = proxy_parser.add_subparsers(dest="proxy_command")
|
||||
|
||||
proxy_start = proxy_subparsers.add_parser(
|
||||
"start", help="Run the proxy in the foreground"
|
||||
)
|
||||
proxy_start.add_argument(
|
||||
"--provider",
|
||||
default="nous",
|
||||
help="Upstream provider (default: nous). See `hermes proxy providers`.",
|
||||
)
|
||||
proxy_start.add_argument(
|
||||
"--host",
|
||||
default=None,
|
||||
help="Bind address (default: 127.0.0.1). Use 0.0.0.0 to expose on LAN.",
|
||||
)
|
||||
proxy_start.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=None,
|
||||
help="Bind port (default: 8645)",
|
||||
)
|
||||
|
||||
proxy_subparsers.add_parser(
|
||||
"status", help="Show which proxy upstreams are ready"
|
||||
)
|
||||
proxy_subparsers.add_parser(
|
||||
"providers", help="List available proxy upstream providers"
|
||||
)
|
||||
proxy_parser.set_defaults(func=cmd_proxy)
|
||||
gateway_parser.set_defaults(func=cmd_gateway)
|
||||
|
||||
# =========================================================================
|
||||
|
|
|
|||
20
hermes_cli/proxy/__init__.py
Normal file
20
hermes_cli/proxy/__init__.py
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
"""Local OpenAI-compatible proxy that forwards to OAuth-authenticated upstreams.
|
||||
|
||||
Lets external apps (OpenViking, Karakeep, Open WebUI, ...) ride the user's
|
||||
already-logged-in provider subscription instead of needing a static API key
|
||||
copy-pasted into each app's config.
|
||||
|
||||
The proxy listens on ``127.0.0.1:<port>``, accepts any bearer (the client's
|
||||
``Authorization`` header is discarded), and attaches the user's real
|
||||
upstream credential to the forwarded request. The credential is refreshed
|
||||
automatically when it approaches expiry.
|
||||
|
||||
First-class adapter:
|
||||
- ``nous`` — Nous Portal (https://inference-api.nousresearch.com/v1)
|
||||
|
||||
Future adapters can plug in by implementing ``UpstreamAdapter``.
|
||||
"""
|
||||
|
||||
from hermes_cli.proxy.adapters.base import UpstreamAdapter
|
||||
|
||||
__all__ = ["UpstreamAdapter"]
|
||||
35
hermes_cli/proxy/adapters/__init__.py
Normal file
35
hermes_cli/proxy/adapters/__init__.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
"""Upstream adapter registry for the local proxy server.
|
||||
|
||||
Each adapter wraps a provider's OAuth state and exposes a uniform interface
|
||||
the proxy server can use to forward requests with a freshly-minted bearer
|
||||
token. See :class:`UpstreamAdapter` for the contract.
|
||||
"""
|
||||
|
||||
from typing import Dict, Type
|
||||
|
||||
from hermes_cli.proxy.adapters.base import UpstreamAdapter
|
||||
from hermes_cli.proxy.adapters.nous_portal import NousPortalAdapter
|
||||
|
||||
# Registry of available adapter classes keyed by provider name as used on
|
||||
# the ``hermes proxy start --provider <name>`` CLI flag.
|
||||
ADAPTERS: Dict[str, Type[UpstreamAdapter]] = {
|
||||
"nous": NousPortalAdapter,
|
||||
}
|
||||
|
||||
|
||||
def get_adapter(name: str) -> UpstreamAdapter:
|
||||
"""Instantiate an adapter by provider name.
|
||||
|
||||
Raises:
|
||||
ValueError: if ``name`` is not a registered adapter.
|
||||
"""
|
||||
key = (name or "").strip().lower()
|
||||
if key not in ADAPTERS:
|
||||
available = ", ".join(sorted(ADAPTERS)) or "(none)"
|
||||
raise ValueError(
|
||||
f"Unknown proxy upstream provider: {name!r}. Available: {available}"
|
||||
)
|
||||
return ADAPTERS[key]()
|
||||
|
||||
|
||||
__all__ = ["UpstreamAdapter", "ADAPTERS", "get_adapter"]
|
||||
94
hermes_cli/proxy/adapters/base.py
Normal file
94
hermes_cli/proxy/adapters/base.py
Normal file
|
|
@ -0,0 +1,94 @@
|
|||
"""Abstract base for proxy upstream adapters.
|
||||
|
||||
An :class:`UpstreamAdapter` represents one OAuth-authenticated provider the
|
||||
local proxy can forward requests to. The adapter is responsible for:
|
||||
|
||||
- locating the user's auth state for that provider
|
||||
- refreshing/minting credentials when needed
|
||||
- reporting the resolved upstream base URL
|
||||
- declaring which request paths it accepts
|
||||
|
||||
The proxy server is otherwise provider-agnostic.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import FrozenSet, Optional
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class UpstreamCredential:
|
||||
"""A resolved bearer + base URL ready to forward to."""
|
||||
|
||||
bearer: str
|
||||
"""Authorization header value to send upstream (token only, no ``Bearer`` prefix)."""
|
||||
|
||||
base_url: str
|
||||
"""Upstream base URL, e.g. ``https://inference-api.nousresearch.com/v1``."""
|
||||
|
||||
token_type: str = "Bearer"
|
||||
"""Auth scheme — currently always ``Bearer`` for supported providers."""
|
||||
|
||||
expires_at: Optional[str] = None
|
||||
"""ISO-8601 expiry timestamp for the bearer, when known. Informational."""
|
||||
|
||||
|
||||
class UpstreamAdapter(ABC):
|
||||
"""Contract for an upstream provider the proxy can forward to."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def name(self) -> str:
|
||||
"""Adapter key used on the CLI (e.g. ``"nous"``)."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def display_name(self) -> str:
|
||||
"""Human-readable provider name for logs and ``proxy status``."""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def allowed_paths(self) -> FrozenSet[str]:
|
||||
"""Set of relative request paths the upstream accepts.
|
||||
|
||||
Paths are relative to the proxy's ``/v1`` mount point. For example,
|
||||
``"/chat/completions"`` corresponds to a client request to
|
||||
``http://127.0.0.1:<port>/v1/chat/completions``. Requests to paths
|
||||
not in this set get a 404 with a helpful error body.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def is_authenticated(self) -> bool:
|
||||
"""Return True if the user has usable credentials for this upstream.
|
||||
|
||||
Should be cheap — no network calls. Used by ``proxy start`` for a
|
||||
clear up-front error before binding a port.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_credential(self) -> UpstreamCredential:
|
||||
"""Return a fresh credential, refreshing/minting if necessary.
|
||||
|
||||
Implementations should:
|
||||
- refresh the access token if it's near expiry
|
||||
- mint/rotate the upstream bearer key if it's near expiry
|
||||
- persist any refreshed state back to disk
|
||||
|
||||
Raises:
|
||||
RuntimeError: if the user isn't authenticated or the upstream
|
||||
refresh fails. The proxy will return 401 to the client.
|
||||
"""
|
||||
|
||||
def describe(self) -> str:
|
||||
"""One-line status summary for ``proxy status``."""
|
||||
try:
|
||||
cred = self.get_credential()
|
||||
except Exception as exc: # pragma: no cover - defensive
|
||||
return f"{self.display_name}: not ready ({exc})"
|
||||
ttl = f" (expires {cred.expires_at})" if cred.expires_at else ""
|
||||
return f"{self.display_name}: {cred.base_url}{ttl}"
|
||||
|
||||
|
||||
__all__ = ["UpstreamAdapter", "UpstreamCredential"]
|
||||
137
hermes_cli/proxy/adapters/nous_portal.py
Normal file
137
hermes_cli/proxy/adapters/nous_portal.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
"""Nous Portal upstream adapter.
|
||||
|
||||
Reads the user's Nous OAuth state from ``~/.hermes/auth.json``, refreshes
|
||||
the access token and mints a fresh agent key when needed, and exposes the
|
||||
upstream base URL plus minted bearer for the proxy server to forward to.
|
||||
|
||||
The minted ``agent_key`` (not the OAuth ``access_token``) is what
|
||||
``inference-api.nousresearch.com`` accepts as a bearer. The refresh helper
|
||||
already handles both — see :func:`hermes_cli.auth.refresh_nous_oauth_from_state`.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import threading
|
||||
from typing import Any, Dict, FrozenSet, Optional
|
||||
|
||||
from hermes_cli.auth import (
|
||||
DEFAULT_NOUS_INFERENCE_URL,
|
||||
_load_auth_store,
|
||||
_save_auth_store,
|
||||
_write_shared_nous_state,
|
||||
refresh_nous_oauth_from_state,
|
||||
)
|
||||
from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Endpoints inference-api.nousresearch.com actually serves. Anything else
|
||||
# the proxy will reject with 404 — keeps stray clients from leaking weird
|
||||
# requests to the upstream.
|
||||
_ALLOWED_PATHS: FrozenSet[str] = frozenset(
|
||||
{
|
||||
"/chat/completions",
|
||||
"/completions",
|
||||
"/embeddings",
|
||||
"/models",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
class NousPortalAdapter(UpstreamAdapter):
|
||||
"""Proxy upstream for the Nous Portal inference API."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
# Lock guards _load → refresh → _save against parallel proxy requests
|
||||
# racing to refresh expired tokens. Refresh itself is HTTP, so we
|
||||
# hold the lock across the network call (brief; OAuth refresh is fast).
|
||||
self._lock = threading.Lock()
|
||||
|
||||
@property
|
||||
def name(self) -> str:
|
||||
return "nous"
|
||||
|
||||
@property
|
||||
def display_name(self) -> str:
|
||||
return "Nous Portal"
|
||||
|
||||
@property
|
||||
def allowed_paths(self) -> FrozenSet[str]:
|
||||
return _ALLOWED_PATHS
|
||||
|
||||
def is_authenticated(self) -> bool:
|
||||
state = self._read_state()
|
||||
if state is None:
|
||||
return False
|
||||
# We need either a usable agent_key OR (refresh_token + access_token)
|
||||
# to recover. The refresh helper will mint/refresh as needed.
|
||||
return bool(
|
||||
state.get("agent_key")
|
||||
or (state.get("refresh_token") and state.get("access_token"))
|
||||
)
|
||||
|
||||
def get_credential(self) -> UpstreamCredential:
|
||||
with self._lock:
|
||||
state = self._read_state()
|
||||
if state is None:
|
||||
raise RuntimeError(
|
||||
"Not logged into Nous Portal. Run `hermes login nous` first."
|
||||
)
|
||||
|
||||
try:
|
||||
refreshed = refresh_nous_oauth_from_state(state)
|
||||
except Exception as exc:
|
||||
raise RuntimeError(
|
||||
f"Failed to refresh Nous Portal credentials: {exc}"
|
||||
) from exc
|
||||
|
||||
self._save_state(refreshed)
|
||||
|
||||
agent_key = refreshed.get("agent_key")
|
||||
if not agent_key:
|
||||
raise RuntimeError(
|
||||
"Nous Portal refresh did not return a usable agent_key. "
|
||||
"Try `hermes login nous` to re-authenticate."
|
||||
)
|
||||
|
||||
base_url = refreshed.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL
|
||||
base_url = base_url.rstrip("/")
|
||||
|
||||
return UpstreamCredential(
|
||||
bearer=agent_key,
|
||||
base_url=base_url,
|
||||
expires_at=refreshed.get("agent_key_expires_at"),
|
||||
)
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Internal helpers — auth.json access. Kept local rather than added
|
||||
# to hermes_cli.auth to avoid expanding that module's public surface.
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _read_state(self) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
store = _load_auth_store()
|
||||
except Exception as exc:
|
||||
logger.warning("proxy: failed to load auth store: %s", exc)
|
||||
return None
|
||||
providers = store.get("providers") or {}
|
||||
state = providers.get("nous")
|
||||
if not isinstance(state, dict):
|
||||
return None
|
||||
return dict(state) # copy so the refresh helper can mutate freely
|
||||
|
||||
def _save_state(self, state: Dict[str, Any]) -> None:
|
||||
try:
|
||||
store = _load_auth_store()
|
||||
providers = store.setdefault("providers", {})
|
||||
providers["nous"] = state
|
||||
_save_auth_store(store)
|
||||
_write_shared_nous_state(state)
|
||||
except Exception as exc:
|
||||
# Best effort — we still return the fresh credential. The next
|
||||
# request just won't see cached state, which means another refresh.
|
||||
logger.warning("proxy: failed to persist refreshed Nous state: %s", exc)
|
||||
|
||||
|
||||
__all__ = ["NousPortalAdapter"]
|
||||
141
hermes_cli/proxy/cli.py
Normal file
141
hermes_cli/proxy/cli.py
Normal file
|
|
@ -0,0 +1,141 @@
|
|||
"""CLI handlers for the ``hermes proxy`` subcommand."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from hermes_cli.proxy.adapters import ADAPTERS, get_adapter
|
||||
from hermes_cli.proxy.server import (
|
||||
AIOHTTP_AVAILABLE,
|
||||
DEFAULT_HOST,
|
||||
DEFAULT_PORT,
|
||||
run_server,
|
||||
)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _print_aiohttp_missing() -> None:
|
||||
print(
|
||||
"hermes proxy requires aiohttp. Install one of:\n"
|
||||
" pip install 'hermes-agent[messaging]'\n"
|
||||
" pip install aiohttp",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
|
||||
def cmd_proxy_start(args: Any) -> int:
|
||||
"""Run the proxy server in the foreground.
|
||||
|
||||
Returns process exit code (0 on clean shutdown).
|
||||
"""
|
||||
if not AIOHTTP_AVAILABLE:
|
||||
_print_aiohttp_missing()
|
||||
return 1
|
||||
|
||||
provider = getattr(args, "provider", None) or "nous"
|
||||
try:
|
||||
adapter = get_adapter(provider)
|
||||
except ValueError as exc:
|
||||
print(f"Error: {exc}", file=sys.stderr)
|
||||
return 2
|
||||
|
||||
if not adapter.is_authenticated():
|
||||
print(
|
||||
f"Not logged into {adapter.display_name}. "
|
||||
f"Run `hermes login {adapter.name}` first.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 2
|
||||
|
||||
host = getattr(args, "host", None) or DEFAULT_HOST
|
||||
port = getattr(args, "port", None) or DEFAULT_PORT
|
||||
|
||||
print(
|
||||
f"Starting Hermes proxy for {adapter.display_name}\n"
|
||||
f" Listening on: http://{host}:{port}/v1\n"
|
||||
f" Forwarding to: (resolved per-request from your subscription)\n"
|
||||
f" Use any bearer token in the client — the proxy attaches your real credential.\n"
|
||||
f"\n"
|
||||
f"Press Ctrl+C to stop.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
try:
|
||||
asyncio.run(run_server(adapter, host=host, port=port))
|
||||
except KeyboardInterrupt:
|
||||
print("\nproxy: stopped", file=sys.stderr)
|
||||
except OSError as exc:
|
||||
print(f"proxy: failed to bind {host}:{port}: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_proxy_status(args: Any) -> int:
|
||||
"""Print the status of each configured upstream adapter."""
|
||||
print("Hermes proxy upstream adapters\n")
|
||||
for name in sorted(ADAPTERS):
|
||||
adapter = get_adapter(name)
|
||||
if not adapter.is_authenticated():
|
||||
print(f" [{name:8s}] {adapter.display_name} — not logged in")
|
||||
continue
|
||||
try:
|
||||
cred = adapter.get_credential()
|
||||
except Exception as exc:
|
||||
print(
|
||||
f" [{name:8s}] {adapter.display_name} — credentials need attention "
|
||||
f"({exc})"
|
||||
)
|
||||
continue
|
||||
expires = f" (bearer expires {cred.expires_at})" if cred.expires_at else ""
|
||||
print(f" [{name:8s}] {adapter.display_name} — ready{expires}")
|
||||
print(
|
||||
"\nStart the proxy with: hermes proxy start [--provider <name>]"
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_proxy_list_providers(args: Any) -> int:
|
||||
"""List available proxy upstream providers."""
|
||||
print("Available proxy upstream providers:")
|
||||
for name in sorted(ADAPTERS):
|
||||
adapter = get_adapter(name)
|
||||
print(f" {name} — {adapter.display_name}")
|
||||
return 0
|
||||
|
||||
|
||||
def cmd_proxy(args: Any) -> int:
|
||||
"""Dispatch ``hermes proxy <subcommand>``."""
|
||||
sub = getattr(args, "proxy_command", None)
|
||||
if sub == "start":
|
||||
return cmd_proxy_start(args)
|
||||
if sub == "status":
|
||||
return cmd_proxy_status(args)
|
||||
if sub in ("providers", "list"):
|
||||
return cmd_proxy_list_providers(args)
|
||||
# No subcommand → print short help.
|
||||
print(
|
||||
"hermes proxy — local OpenAI-compatible proxy that attaches your\n"
|
||||
"OAuth-authenticated provider credentials to outbound requests.\n"
|
||||
"\n"
|
||||
"Subcommands:\n"
|
||||
" hermes proxy start [--provider nous] [--host 127.0.0.1] [--port 8645]\n"
|
||||
" Run the proxy in the foreground.\n"
|
||||
" hermes proxy status\n"
|
||||
" Show which upstream adapters are ready.\n"
|
||||
" hermes proxy providers\n"
|
||||
" List available upstream providers.\n",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
__all__ = [
|
||||
"cmd_proxy",
|
||||
"cmd_proxy_start",
|
||||
"cmd_proxy_status",
|
||||
"cmd_proxy_list_providers",
|
||||
]
|
||||
265
hermes_cli/proxy/server.py
Normal file
265
hermes_cli/proxy/server.py
Normal file
|
|
@ -0,0 +1,265 @@
|
|||
"""HTTP server that forwards OpenAI-compatible requests to a configured upstream.
|
||||
|
||||
Listens on ``http://<host>:<port>/v1/<path>`` and forwards each request to
|
||||
``<upstream-base-url>/<path>`` with the client's ``Authorization`` header
|
||||
replaced by a freshly-resolved bearer from the configured adapter. The
|
||||
response is streamed back unmodified, preserving SSE.
|
||||
|
||||
The server is intentionally minimal: it does NOT mediate, log, transform,
|
||||
or rewrite request/response bodies. It's a credential-attaching forwarder.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import signal
|
||||
from typing import Optional
|
||||
|
||||
try:
|
||||
import aiohttp
|
||||
from aiohttp import web
|
||||
AIOHTTP_AVAILABLE = True
|
||||
except ImportError:
|
||||
aiohttp = None # type: ignore[assignment]
|
||||
web = None # type: ignore[assignment]
|
||||
AIOHTTP_AVAILABLE = False
|
||||
|
||||
from hermes_cli.proxy.adapters.base import UpstreamAdapter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Headers we strip when forwarding to the upstream. ``host``/``content-length``
|
||||
# are recomputed by aiohttp; ``authorization`` is replaced with our bearer.
|
||||
# Everything else (content-type, accept, user-agent, x-* headers) passes through.
|
||||
_HOP_BY_HOP_HEADERS = frozenset(
|
||||
{
|
||||
"host",
|
||||
"content-length",
|
||||
"connection",
|
||||
"keep-alive",
|
||||
"proxy-authenticate",
|
||||
"proxy-authorization",
|
||||
"te",
|
||||
"trailers",
|
||||
"transfer-encoding",
|
||||
"upgrade",
|
||||
"authorization", # we replace this one
|
||||
}
|
||||
)
|
||||
|
||||
DEFAULT_PORT = 8645
|
||||
DEFAULT_HOST = "127.0.0.1"
|
||||
|
||||
|
||||
def _json_error(status: int, message: str, code: str = "proxy_error") -> "web.Response":
|
||||
"""Return an OpenAI-style error JSON response."""
|
||||
body = {"error": {"message": message, "type": code, "code": code}}
|
||||
return web.json_response(body, status=status)
|
||||
|
||||
|
||||
def _filter_request_headers(headers: "aiohttp.typedefs.LooseHeaders") -> dict:
|
||||
"""Strip hop-by-hop + auth headers from the inbound request."""
|
||||
out = {}
|
||||
for key, value in headers.items():
|
||||
if key.lower() in _HOP_BY_HOP_HEADERS:
|
||||
continue
|
||||
out[key] = value
|
||||
return out
|
||||
|
||||
|
||||
def _filter_response_headers(headers) -> dict:
|
||||
"""Strip hop-by-hop headers from the upstream response."""
|
||||
out = {}
|
||||
for key, value in headers.items():
|
||||
if key.lower() in _HOP_BY_HOP_HEADERS:
|
||||
continue
|
||||
# aiohttp recomputes Content-Encoding/Content-Length on stream — let it.
|
||||
if key.lower() in ("content-encoding", "content-length"):
|
||||
continue
|
||||
out[key] = value
|
||||
return out
|
||||
|
||||
|
||||
def create_app(adapter: UpstreamAdapter) -> "web.Application":
|
||||
"""Build the aiohttp application bound to a specific upstream adapter."""
|
||||
if not AIOHTTP_AVAILABLE:
|
||||
raise RuntimeError(
|
||||
"aiohttp is required for `hermes proxy`. Install with: "
|
||||
"pip install 'hermes-agent[messaging]' or `pip install aiohttp`."
|
||||
)
|
||||
|
||||
app = web.Application()
|
||||
# AppKey ensures forward-compat with future aiohttp versions that strip
|
||||
# bare-string keys.
|
||||
_adapter_key = web.AppKey("adapter", UpstreamAdapter)
|
||||
app[_adapter_key] = adapter
|
||||
|
||||
async def handle_health(request: "web.Request") -> "web.Response":
|
||||
return web.json_response(
|
||||
{
|
||||
"status": "ok",
|
||||
"upstream": adapter.display_name,
|
||||
"authenticated": adapter.is_authenticated(),
|
||||
}
|
||||
)
|
||||
|
||||
async def handle_models_fallback(request: "web.Request") -> "web.Response":
|
||||
# Most clients hit /v1/models on startup. If the upstream doesn't
|
||||
# serve /models, synthesize a minimal response so clients don't
|
||||
# crash. The actual forwarding path handles /models when allowed.
|
||||
return web.json_response(
|
||||
{
|
||||
"object": "list",
|
||||
"data": [],
|
||||
}
|
||||
)
|
||||
|
||||
async def handle_proxy(request: "web.Request") -> "web.StreamResponse":
|
||||
# Extract the path *after* /v1
|
||||
rel_path = request.match_info.get("tail", "")
|
||||
rel_path = "/" + rel_path.lstrip("/")
|
||||
|
||||
if rel_path not in adapter.allowed_paths:
|
||||
allowed = ", ".join(sorted(adapter.allowed_paths))
|
||||
return _json_error(
|
||||
404,
|
||||
f"Path /v1{rel_path} is not forwarded by this proxy. "
|
||||
f"Allowed: {allowed}",
|
||||
code="path_not_allowed",
|
||||
)
|
||||
|
||||
try:
|
||||
cred = adapter.get_credential()
|
||||
except Exception as exc:
|
||||
logger.warning("proxy: credential resolution failed: %s", exc)
|
||||
return _json_error(401, str(exc), code="upstream_auth_failed")
|
||||
|
||||
upstream_url = f"{cred.base_url.rstrip('/')}{rel_path}"
|
||||
# Preserve query string verbatim.
|
||||
if request.query_string:
|
||||
upstream_url = f"{upstream_url}?{request.query_string}"
|
||||
|
||||
# Forward body verbatim. Read into memory once — request bodies for
|
||||
# chat/completions/embeddings are small (<1MB typically). If we ever
|
||||
# need to forward large multipart uploads we'll switch to streaming
|
||||
# the request body too.
|
||||
body = await request.read()
|
||||
|
||||
fwd_headers = _filter_request_headers(request.headers)
|
||||
fwd_headers["Authorization"] = f"{cred.token_type} {cred.bearer}"
|
||||
|
||||
logger.debug(
|
||||
"proxy: forwarding %s %s -> %s (body=%d bytes)",
|
||||
request.method, rel_path, upstream_url, len(body),
|
||||
)
|
||||
|
||||
# Use a per-request session so connection state doesn't leak between
|
||||
# clients. Could be optimized to a shared session later.
|
||||
timeout = aiohttp.ClientTimeout(total=None, sock_connect=15, sock_read=300)
|
||||
try:
|
||||
session = aiohttp.ClientSession(timeout=timeout)
|
||||
except Exception as exc: # pragma: no cover - aiohttp setup issue
|
||||
return _json_error(500, f"proxy session init failed: {exc}")
|
||||
|
||||
try:
|
||||
upstream_resp = await session.request(
|
||||
request.method,
|
||||
upstream_url,
|
||||
data=body if body else None,
|
||||
headers=fwd_headers,
|
||||
allow_redirects=False,
|
||||
)
|
||||
except aiohttp.ClientError as exc:
|
||||
await session.close()
|
||||
logger.warning("proxy: upstream connection failed: %s", exc)
|
||||
return _json_error(502, f"upstream connection failed: {exc}",
|
||||
code="upstream_unreachable")
|
||||
except asyncio.TimeoutError:
|
||||
await session.close()
|
||||
return _json_error(504, "upstream request timed out",
|
||||
code="upstream_timeout")
|
||||
|
||||
# Stream response back. Headers first, then chunked body.
|
||||
resp = web.StreamResponse(
|
||||
status=upstream_resp.status,
|
||||
headers=_filter_response_headers(upstream_resp.headers),
|
||||
)
|
||||
await resp.prepare(request)
|
||||
|
||||
try:
|
||||
async for chunk in upstream_resp.content.iter_any():
|
||||
if chunk:
|
||||
await resp.write(chunk)
|
||||
except (aiohttp.ClientError, asyncio.CancelledError) as exc:
|
||||
logger.warning("proxy: streaming interrupted: %s", exc)
|
||||
finally:
|
||||
upstream_resp.release()
|
||||
await session.close()
|
||||
|
||||
await resp.write_eof()
|
||||
return resp
|
||||
|
||||
# /health doesn't go through the upstream
|
||||
app.router.add_get("/health", handle_health)
|
||||
# Catch-all under /v1 — forwards if the path is allowed.
|
||||
app.router.add_route("*", "/v1/{tail:.*}", handle_proxy)
|
||||
|
||||
return app
|
||||
|
||||
|
||||
async def run_server(
|
||||
adapter: UpstreamAdapter,
|
||||
host: str = DEFAULT_HOST,
|
||||
port: int = DEFAULT_PORT,
|
||||
shutdown_event: Optional[asyncio.Event] = None,
|
||||
) -> None:
|
||||
"""Run the proxy in the current event loop until shutdown_event is set.
|
||||
|
||||
If shutdown_event is None, runs until cancelled (Ctrl+C or SIGTERM).
|
||||
"""
|
||||
if not AIOHTTP_AVAILABLE:
|
||||
raise RuntimeError(
|
||||
"aiohttp is required for `hermes proxy`. Install with: "
|
||||
"pip install 'hermes-agent[messaging]' or `pip install aiohttp`."
|
||||
)
|
||||
|
||||
app = create_app(adapter)
|
||||
runner = web.AppRunner(app, access_log=None)
|
||||
await runner.setup()
|
||||
site = web.TCPSite(runner, host=host, port=port)
|
||||
await site.start()
|
||||
|
||||
logger.info(
|
||||
"proxy: listening on http://%s:%d/v1 -> %s",
|
||||
host, port, adapter.display_name,
|
||||
)
|
||||
|
||||
stop_event = shutdown_event or asyncio.Event()
|
||||
|
||||
# Wire signal handlers when we own the loop's lifetime.
|
||||
if shutdown_event is None:
|
||||
loop = asyncio.get_running_loop()
|
||||
for sig in (signal.SIGINT, signal.SIGTERM):
|
||||
try:
|
||||
loop.add_signal_handler(sig, stop_event.set)
|
||||
except NotImplementedError:
|
||||
# Windows / restricted environments — Ctrl+C will still
|
||||
# raise KeyboardInterrupt and unwind us.
|
||||
pass
|
||||
|
||||
try:
|
||||
await stop_event.wait()
|
||||
finally:
|
||||
logger.info("proxy: shutting down")
|
||||
await runner.cleanup()
|
||||
|
||||
|
||||
__all__ = [
|
||||
"create_app",
|
||||
"run_server",
|
||||
"DEFAULT_HOST",
|
||||
"DEFAULT_PORT",
|
||||
"AIOHTTP_AVAILABLE",
|
||||
]
|
||||
Loading…
Add table
Add a link
Reference in a new issue