mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-05-18 04:41:56 +00:00
feat(proxy): local OpenAI-compatible proxy for OAuth providers (#25969)
Adds 'hermes proxy start' — a local HTTP server that lets external apps (OpenViking, Karakeep, Open WebUI, ...) use a Hermes-managed provider subscription as their LLM endpoint. The proxy attaches the user's real OAuth-resolved credentials to each forwarded request, refreshing them automatically; the client can send any bearer (it gets stripped). Ships with one adapter — Nous Portal. The UpstreamAdapter ABC and registry in hermes_cli/proxy/adapters/ are designed for additional OAuth providers to plug in by name without server changes. Commands: hermes proxy start [--provider nous] [--host 127.0.0.1] [--port 8645] hermes proxy status hermes proxy providers Allowed Portal paths: /v1/chat/completions, /v1/completions, /v1/embeddings, /v1/models. Anything else returns 404 with a clear error pointing at the allowed list. aiohttp is gated like gateway/platforms/api_server.py (try-import, clean runtime error if missing). No new core dependency. Tests: 24 unit tests + 1 separate E2E that spawns the real subprocess and verifies the upstream receives the right bearer with the client's header stripped.
This commit is contained in:
parent
34fc94d1f4
commit
ccb5aae0d2
11 changed files with 1466 additions and 1 deletions
|
|
@ -1452,6 +1452,17 @@ def cmd_gateway(args):
|
||||||
gateway_command(args)
|
gateway_command(args)
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_proxy(args):
|
||||||
|
"""Local OpenAI-compatible proxy to OAuth providers."""
|
||||||
|
# Lazy import — pulls in aiohttp, which is gated behind an extras install
|
||||||
|
# for users who don't run the proxy or the messaging gateway.
|
||||||
|
from hermes_cli.proxy.cli import cmd_proxy as _cmd_proxy
|
||||||
|
|
||||||
|
rc = _cmd_proxy(args)
|
||||||
|
if isinstance(rc, int) and rc != 0:
|
||||||
|
raise SystemExit(rc)
|
||||||
|
|
||||||
|
|
||||||
def cmd_whatsapp(args):
|
def cmd_whatsapp(args):
|
||||||
"""Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
|
"""Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
|
||||||
_require_tty("whatsapp")
|
_require_tty("whatsapp")
|
||||||
|
|
@ -9385,7 +9396,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
|
||||||
"config", "cron", "curator", "dashboard", "debug", "doctor",
|
"config", "cron", "curator", "dashboard", "debug", "doctor",
|
||||||
"dump", "fallback", "gateway", "hooks", "import", "insights",
|
"dump", "fallback", "gateway", "hooks", "import", "insights",
|
||||||
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
|
"kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
|
||||||
"model", "pairing", "plugins", "profile", "sessions", "setup",
|
"model", "pairing", "plugins", "profile", "proxy", "sessions", "setup",
|
||||||
"skills", "slack", "status", "tools", "uninstall", "update",
|
"skills", "slack", "status", "tools", "uninstall", "update",
|
||||||
"version", "webhook", "whatsapp", "chat",
|
"version", "webhook", "whatsapp", "chat",
|
||||||
# Help-ish invocations — plugin commands not being listed in
|
# Help-ish invocations — plugin commands not being listed in
|
||||||
|
|
@ -9727,6 +9738,51 @@ def main():
|
||||||
help="Skip the confirmation prompt",
|
help="Skip the confirmation prompt",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# proxy command — local OpenAI-compatible proxy that attaches the user's
|
||||||
|
# OAuth-authenticated provider credentials to outbound requests. Lets
|
||||||
|
# external apps (OpenViking, Karakeep, Open WebUI, ...) ride a logged-in
|
||||||
|
# subscription without copy-pasting static API keys.
|
||||||
|
# =========================================================================
|
||||||
|
proxy_parser = subparsers.add_parser(
|
||||||
|
"proxy",
|
||||||
|
help="Local OpenAI-compatible proxy to OAuth providers",
|
||||||
|
description=(
|
||||||
|
"Run a local HTTP server that forwards OpenAI-compatible requests "
|
||||||
|
"to an OAuth-authenticated provider (e.g. Nous Portal). External "
|
||||||
|
"apps can point at the proxy with any bearer token; the proxy "
|
||||||
|
"attaches your real credentials."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
proxy_subparsers = proxy_parser.add_subparsers(dest="proxy_command")
|
||||||
|
|
||||||
|
proxy_start = proxy_subparsers.add_parser(
|
||||||
|
"start", help="Run the proxy in the foreground"
|
||||||
|
)
|
||||||
|
proxy_start.add_argument(
|
||||||
|
"--provider",
|
||||||
|
default="nous",
|
||||||
|
help="Upstream provider (default: nous). See `hermes proxy providers`.",
|
||||||
|
)
|
||||||
|
proxy_start.add_argument(
|
||||||
|
"--host",
|
||||||
|
default=None,
|
||||||
|
help="Bind address (default: 127.0.0.1). Use 0.0.0.0 to expose on LAN.",
|
||||||
|
)
|
||||||
|
proxy_start.add_argument(
|
||||||
|
"--port",
|
||||||
|
type=int,
|
||||||
|
default=None,
|
||||||
|
help="Bind port (default: 8645)",
|
||||||
|
)
|
||||||
|
|
||||||
|
proxy_subparsers.add_parser(
|
||||||
|
"status", help="Show which proxy upstreams are ready"
|
||||||
|
)
|
||||||
|
proxy_subparsers.add_parser(
|
||||||
|
"providers", help="List available proxy upstream providers"
|
||||||
|
)
|
||||||
|
proxy_parser.set_defaults(func=cmd_proxy)
|
||||||
gateway_parser.set_defaults(func=cmd_gateway)
|
gateway_parser.set_defaults(func=cmd_gateway)
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
|
||||||
20
hermes_cli/proxy/__init__.py
Normal file
20
hermes_cli/proxy/__init__.py
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
"""Local OpenAI-compatible proxy that forwards to OAuth-authenticated upstreams.
|
||||||
|
|
||||||
|
Lets external apps (OpenViking, Karakeep, Open WebUI, ...) ride the user's
|
||||||
|
already-logged-in provider subscription instead of needing a static API key
|
||||||
|
copy-pasted into each app's config.
|
||||||
|
|
||||||
|
The proxy listens on ``127.0.0.1:<port>``, accepts any bearer (the client's
|
||||||
|
``Authorization`` header is discarded), and attaches the user's real
|
||||||
|
upstream credential to the forwarded request. The credential is refreshed
|
||||||
|
automatically when it approaches expiry.
|
||||||
|
|
||||||
|
First-class adapter:
|
||||||
|
- ``nous`` — Nous Portal (https://inference-api.nousresearch.com/v1)
|
||||||
|
|
||||||
|
Future adapters can plug in by implementing ``UpstreamAdapter``.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from hermes_cli.proxy.adapters.base import UpstreamAdapter
|
||||||
|
|
||||||
|
__all__ = ["UpstreamAdapter"]
|
||||||
35
hermes_cli/proxy/adapters/__init__.py
Normal file
35
hermes_cli/proxy/adapters/__init__.py
Normal file
|
|
@ -0,0 +1,35 @@
|
||||||
|
"""Upstream adapter registry for the local proxy server.
|
||||||
|
|
||||||
|
Each adapter wraps a provider's OAuth state and exposes a uniform interface
|
||||||
|
the proxy server can use to forward requests with a freshly-minted bearer
|
||||||
|
token. See :class:`UpstreamAdapter` for the contract.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from typing import Dict, Type
|
||||||
|
|
||||||
|
from hermes_cli.proxy.adapters.base import UpstreamAdapter
|
||||||
|
from hermes_cli.proxy.adapters.nous_portal import NousPortalAdapter
|
||||||
|
|
||||||
|
# Registry of available adapter classes keyed by provider name as used on
|
||||||
|
# the ``hermes proxy start --provider <name>`` CLI flag.
|
||||||
|
ADAPTERS: Dict[str, Type[UpstreamAdapter]] = {
|
||||||
|
"nous": NousPortalAdapter,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def get_adapter(name: str) -> UpstreamAdapter:
|
||||||
|
"""Instantiate an adapter by provider name.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
ValueError: if ``name`` is not a registered adapter.
|
||||||
|
"""
|
||||||
|
key = (name or "").strip().lower()
|
||||||
|
if key not in ADAPTERS:
|
||||||
|
available = ", ".join(sorted(ADAPTERS)) or "(none)"
|
||||||
|
raise ValueError(
|
||||||
|
f"Unknown proxy upstream provider: {name!r}. Available: {available}"
|
||||||
|
)
|
||||||
|
return ADAPTERS[key]()
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["UpstreamAdapter", "ADAPTERS", "get_adapter"]
|
||||||
94
hermes_cli/proxy/adapters/base.py
Normal file
94
hermes_cli/proxy/adapters/base.py
Normal file
|
|
@ -0,0 +1,94 @@
|
||||||
|
"""Abstract base for proxy upstream adapters.
|
||||||
|
|
||||||
|
An :class:`UpstreamAdapter` represents one OAuth-authenticated provider the
|
||||||
|
local proxy can forward requests to. The adapter is responsible for:
|
||||||
|
|
||||||
|
- locating the user's auth state for that provider
|
||||||
|
- refreshing/minting credentials when needed
|
||||||
|
- reporting the resolved upstream base URL
|
||||||
|
- declaring which request paths it accepts
|
||||||
|
|
||||||
|
The proxy server is otherwise provider-agnostic.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import FrozenSet, Optional
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(frozen=True)
|
||||||
|
class UpstreamCredential:
|
||||||
|
"""A resolved bearer + base URL ready to forward to."""
|
||||||
|
|
||||||
|
bearer: str
|
||||||
|
"""Authorization header value to send upstream (token only, no ``Bearer`` prefix)."""
|
||||||
|
|
||||||
|
base_url: str
|
||||||
|
"""Upstream base URL, e.g. ``https://inference-api.nousresearch.com/v1``."""
|
||||||
|
|
||||||
|
token_type: str = "Bearer"
|
||||||
|
"""Auth scheme — currently always ``Bearer`` for supported providers."""
|
||||||
|
|
||||||
|
expires_at: Optional[str] = None
|
||||||
|
"""ISO-8601 expiry timestamp for the bearer, when known. Informational."""
|
||||||
|
|
||||||
|
|
||||||
|
class UpstreamAdapter(ABC):
|
||||||
|
"""Contract for an upstream provider the proxy can forward to."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def name(self) -> str:
|
||||||
|
"""Adapter key used on the CLI (e.g. ``"nous"``)."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def display_name(self) -> str:
|
||||||
|
"""Human-readable provider name for logs and ``proxy status``."""
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def allowed_paths(self) -> FrozenSet[str]:
|
||||||
|
"""Set of relative request paths the upstream accepts.
|
||||||
|
|
||||||
|
Paths are relative to the proxy's ``/v1`` mount point. For example,
|
||||||
|
``"/chat/completions"`` corresponds to a client request to
|
||||||
|
``http://127.0.0.1:<port>/v1/chat/completions``. Requests to paths
|
||||||
|
not in this set get a 404 with a helpful error body.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def is_authenticated(self) -> bool:
|
||||||
|
"""Return True if the user has usable credentials for this upstream.
|
||||||
|
|
||||||
|
Should be cheap — no network calls. Used by ``proxy start`` for a
|
||||||
|
clear up-front error before binding a port.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_credential(self) -> UpstreamCredential:
|
||||||
|
"""Return a fresh credential, refreshing/minting if necessary.
|
||||||
|
|
||||||
|
Implementations should:
|
||||||
|
- refresh the access token if it's near expiry
|
||||||
|
- mint/rotate the upstream bearer key if it's near expiry
|
||||||
|
- persist any refreshed state back to disk
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
RuntimeError: if the user isn't authenticated or the upstream
|
||||||
|
refresh fails. The proxy will return 401 to the client.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def describe(self) -> str:
|
||||||
|
"""One-line status summary for ``proxy status``."""
|
||||||
|
try:
|
||||||
|
cred = self.get_credential()
|
||||||
|
except Exception as exc: # pragma: no cover - defensive
|
||||||
|
return f"{self.display_name}: not ready ({exc})"
|
||||||
|
ttl = f" (expires {cred.expires_at})" if cred.expires_at else ""
|
||||||
|
return f"{self.display_name}: {cred.base_url}{ttl}"
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["UpstreamAdapter", "UpstreamCredential"]
|
||||||
137
hermes_cli/proxy/adapters/nous_portal.py
Normal file
137
hermes_cli/proxy/adapters/nous_portal.py
Normal file
|
|
@ -0,0 +1,137 @@
|
||||||
|
"""Nous Portal upstream adapter.
|
||||||
|
|
||||||
|
Reads the user's Nous OAuth state from ``~/.hermes/auth.json``, refreshes
|
||||||
|
the access token and mints a fresh agent key when needed, and exposes the
|
||||||
|
upstream base URL plus minted bearer for the proxy server to forward to.
|
||||||
|
|
||||||
|
The minted ``agent_key`` (not the OAuth ``access_token``) is what
|
||||||
|
``inference-api.nousresearch.com`` accepts as a bearer. The refresh helper
|
||||||
|
already handles both — see :func:`hermes_cli.auth.refresh_nous_oauth_from_state`.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import logging
|
||||||
|
import threading
|
||||||
|
from typing import Any, Dict, FrozenSet, Optional
|
||||||
|
|
||||||
|
from hermes_cli.auth import (
|
||||||
|
DEFAULT_NOUS_INFERENCE_URL,
|
||||||
|
_load_auth_store,
|
||||||
|
_save_auth_store,
|
||||||
|
_write_shared_nous_state,
|
||||||
|
refresh_nous_oauth_from_state,
|
||||||
|
)
|
||||||
|
from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Endpoints inference-api.nousresearch.com actually serves. Anything else
|
||||||
|
# the proxy will reject with 404 — keeps stray clients from leaking weird
|
||||||
|
# requests to the upstream.
|
||||||
|
_ALLOWED_PATHS: FrozenSet[str] = frozenset(
|
||||||
|
{
|
||||||
|
"/chat/completions",
|
||||||
|
"/completions",
|
||||||
|
"/embeddings",
|
||||||
|
"/models",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class NousPortalAdapter(UpstreamAdapter):
|
||||||
|
"""Proxy upstream for the Nous Portal inference API."""
|
||||||
|
|
||||||
|
def __init__(self) -> None:
|
||||||
|
# Lock guards _load → refresh → _save against parallel proxy requests
|
||||||
|
# racing to refresh expired tokens. Refresh itself is HTTP, so we
|
||||||
|
# hold the lock across the network call (brief; OAuth refresh is fast).
|
||||||
|
self._lock = threading.Lock()
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self) -> str:
|
||||||
|
return "nous"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def display_name(self) -> str:
|
||||||
|
return "Nous Portal"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def allowed_paths(self) -> FrozenSet[str]:
|
||||||
|
return _ALLOWED_PATHS
|
||||||
|
|
||||||
|
def is_authenticated(self) -> bool:
|
||||||
|
state = self._read_state()
|
||||||
|
if state is None:
|
||||||
|
return False
|
||||||
|
# We need either a usable agent_key OR (refresh_token + access_token)
|
||||||
|
# to recover. The refresh helper will mint/refresh as needed.
|
||||||
|
return bool(
|
||||||
|
state.get("agent_key")
|
||||||
|
or (state.get("refresh_token") and state.get("access_token"))
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_credential(self) -> UpstreamCredential:
|
||||||
|
with self._lock:
|
||||||
|
state = self._read_state()
|
||||||
|
if state is None:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Not logged into Nous Portal. Run `hermes login nous` first."
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
refreshed = refresh_nous_oauth_from_state(state)
|
||||||
|
except Exception as exc:
|
||||||
|
raise RuntimeError(
|
||||||
|
f"Failed to refresh Nous Portal credentials: {exc}"
|
||||||
|
) from exc
|
||||||
|
|
||||||
|
self._save_state(refreshed)
|
||||||
|
|
||||||
|
agent_key = refreshed.get("agent_key")
|
||||||
|
if not agent_key:
|
||||||
|
raise RuntimeError(
|
||||||
|
"Nous Portal refresh did not return a usable agent_key. "
|
||||||
|
"Try `hermes login nous` to re-authenticate."
|
||||||
|
)
|
||||||
|
|
||||||
|
base_url = refreshed.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL
|
||||||
|
base_url = base_url.rstrip("/")
|
||||||
|
|
||||||
|
return UpstreamCredential(
|
||||||
|
bearer=agent_key,
|
||||||
|
base_url=base_url,
|
||||||
|
expires_at=refreshed.get("agent_key_expires_at"),
|
||||||
|
)
|
||||||
|
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
# Internal helpers — auth.json access. Kept local rather than added
|
||||||
|
# to hermes_cli.auth to avoid expanding that module's public surface.
|
||||||
|
# ------------------------------------------------------------------
|
||||||
|
|
||||||
|
def _read_state(self) -> Optional[Dict[str, Any]]:
|
||||||
|
try:
|
||||||
|
store = _load_auth_store()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("proxy: failed to load auth store: %s", exc)
|
||||||
|
return None
|
||||||
|
providers = store.get("providers") or {}
|
||||||
|
state = providers.get("nous")
|
||||||
|
if not isinstance(state, dict):
|
||||||
|
return None
|
||||||
|
return dict(state) # copy so the refresh helper can mutate freely
|
||||||
|
|
||||||
|
def _save_state(self, state: Dict[str, Any]) -> None:
|
||||||
|
try:
|
||||||
|
store = _load_auth_store()
|
||||||
|
providers = store.setdefault("providers", {})
|
||||||
|
providers["nous"] = state
|
||||||
|
_save_auth_store(store)
|
||||||
|
_write_shared_nous_state(state)
|
||||||
|
except Exception as exc:
|
||||||
|
# Best effort — we still return the fresh credential. The next
|
||||||
|
# request just won't see cached state, which means another refresh.
|
||||||
|
logger.warning("proxy: failed to persist refreshed Nous state: %s", exc)
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = ["NousPortalAdapter"]
|
||||||
141
hermes_cli/proxy/cli.py
Normal file
141
hermes_cli/proxy/cli.py
Normal file
|
|
@ -0,0 +1,141 @@
|
||||||
|
"""CLI handlers for the ``hermes proxy`` subcommand."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import logging
|
||||||
|
import sys
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from hermes_cli.proxy.adapters import ADAPTERS, get_adapter
|
||||||
|
from hermes_cli.proxy.server import (
|
||||||
|
AIOHTTP_AVAILABLE,
|
||||||
|
DEFAULT_HOST,
|
||||||
|
DEFAULT_PORT,
|
||||||
|
run_server,
|
||||||
|
)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
|
def _print_aiohttp_missing() -> None:
|
||||||
|
print(
|
||||||
|
"hermes proxy requires aiohttp. Install one of:\n"
|
||||||
|
" pip install 'hermes-agent[messaging]'\n"
|
||||||
|
" pip install aiohttp",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_proxy_start(args: Any) -> int:
|
||||||
|
"""Run the proxy server in the foreground.
|
||||||
|
|
||||||
|
Returns process exit code (0 on clean shutdown).
|
||||||
|
"""
|
||||||
|
if not AIOHTTP_AVAILABLE:
|
||||||
|
_print_aiohttp_missing()
|
||||||
|
return 1
|
||||||
|
|
||||||
|
provider = getattr(args, "provider", None) or "nous"
|
||||||
|
try:
|
||||||
|
adapter = get_adapter(provider)
|
||||||
|
except ValueError as exc:
|
||||||
|
print(f"Error: {exc}", file=sys.stderr)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
if not adapter.is_authenticated():
|
||||||
|
print(
|
||||||
|
f"Not logged into {adapter.display_name}. "
|
||||||
|
f"Run `hermes login {adapter.name}` first.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 2
|
||||||
|
|
||||||
|
host = getattr(args, "host", None) or DEFAULT_HOST
|
||||||
|
port = getattr(args, "port", None) or DEFAULT_PORT
|
||||||
|
|
||||||
|
print(
|
||||||
|
f"Starting Hermes proxy for {adapter.display_name}\n"
|
||||||
|
f" Listening on: http://{host}:{port}/v1\n"
|
||||||
|
f" Forwarding to: (resolved per-request from your subscription)\n"
|
||||||
|
f" Use any bearer token in the client — the proxy attaches your real credential.\n"
|
||||||
|
f"\n"
|
||||||
|
f"Press Ctrl+C to stop.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
asyncio.run(run_server(adapter, host=host, port=port))
|
||||||
|
except KeyboardInterrupt:
|
||||||
|
print("\nproxy: stopped", file=sys.stderr)
|
||||||
|
except OSError as exc:
|
||||||
|
print(f"proxy: failed to bind {host}:{port}: {exc}", file=sys.stderr)
|
||||||
|
return 1
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_proxy_status(args: Any) -> int:
|
||||||
|
"""Print the status of each configured upstream adapter."""
|
||||||
|
print("Hermes proxy upstream adapters\n")
|
||||||
|
for name in sorted(ADAPTERS):
|
||||||
|
adapter = get_adapter(name)
|
||||||
|
if not adapter.is_authenticated():
|
||||||
|
print(f" [{name:8s}] {adapter.display_name} — not logged in")
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
cred = adapter.get_credential()
|
||||||
|
except Exception as exc:
|
||||||
|
print(
|
||||||
|
f" [{name:8s}] {adapter.display_name} — credentials need attention "
|
||||||
|
f"({exc})"
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
expires = f" (bearer expires {cred.expires_at})" if cred.expires_at else ""
|
||||||
|
print(f" [{name:8s}] {adapter.display_name} — ready{expires}")
|
||||||
|
print(
|
||||||
|
"\nStart the proxy with: hermes proxy start [--provider <name>]"
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_proxy_list_providers(args: Any) -> int:
|
||||||
|
"""List available proxy upstream providers."""
|
||||||
|
print("Available proxy upstream providers:")
|
||||||
|
for name in sorted(ADAPTERS):
|
||||||
|
adapter = get_adapter(name)
|
||||||
|
print(f" {name} — {adapter.display_name}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
def cmd_proxy(args: Any) -> int:
|
||||||
|
"""Dispatch ``hermes proxy <subcommand>``."""
|
||||||
|
sub = getattr(args, "proxy_command", None)
|
||||||
|
if sub == "start":
|
||||||
|
return cmd_proxy_start(args)
|
||||||
|
if sub == "status":
|
||||||
|
return cmd_proxy_status(args)
|
||||||
|
if sub in ("providers", "list"):
|
||||||
|
return cmd_proxy_list_providers(args)
|
||||||
|
# No subcommand → print short help.
|
||||||
|
print(
|
||||||
|
"hermes proxy — local OpenAI-compatible proxy that attaches your\n"
|
||||||
|
"OAuth-authenticated provider credentials to outbound requests.\n"
|
||||||
|
"\n"
|
||||||
|
"Subcommands:\n"
|
||||||
|
" hermes proxy start [--provider nous] [--host 127.0.0.1] [--port 8645]\n"
|
||||||
|
" Run the proxy in the foreground.\n"
|
||||||
|
" hermes proxy status\n"
|
||||||
|
" Show which upstream adapters are ready.\n"
|
||||||
|
" hermes proxy providers\n"
|
||||||
|
" List available upstream providers.\n",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"cmd_proxy",
|
||||||
|
"cmd_proxy_start",
|
||||||
|
"cmd_proxy_status",
|
||||||
|
"cmd_proxy_list_providers",
|
||||||
|
]
|
||||||
265
hermes_cli/proxy/server.py
Normal file
265
hermes_cli/proxy/server.py
Normal file
|
|
@ -0,0 +1,265 @@
|
||||||
|
"""HTTP server that forwards OpenAI-compatible requests to a configured upstream.
|
||||||
|
|
||||||
|
Listens on ``http://<host>:<port>/v1/<path>`` and forwards each request to
|
||||||
|
``<upstream-base-url>/<path>`` with the client's ``Authorization`` header
|
||||||
|
replaced by a freshly-resolved bearer from the configured adapter. The
|
||||||
|
response is streamed back unmodified, preserving SSE.
|
||||||
|
|
||||||
|
The server is intentionally minimal: it does NOT mediate, log, transform,
|
||||||
|
or rewrite request/response bodies. It's a credential-attaching forwarder.
|
||||||
|
"""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import logging
|
||||||
|
import signal
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
try:
|
||||||
|
import aiohttp
|
||||||
|
from aiohttp import web
|
||||||
|
AIOHTTP_AVAILABLE = True
|
||||||
|
except ImportError:
|
||||||
|
aiohttp = None # type: ignore[assignment]
|
||||||
|
web = None # type: ignore[assignment]
|
||||||
|
AIOHTTP_AVAILABLE = False
|
||||||
|
|
||||||
|
from hermes_cli.proxy.adapters.base import UpstreamAdapter
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# Headers we strip when forwarding to the upstream. ``host``/``content-length``
|
||||||
|
# are recomputed by aiohttp; ``authorization`` is replaced with our bearer.
|
||||||
|
# Everything else (content-type, accept, user-agent, x-* headers) passes through.
|
||||||
|
_HOP_BY_HOP_HEADERS = frozenset(
|
||||||
|
{
|
||||||
|
"host",
|
||||||
|
"content-length",
|
||||||
|
"connection",
|
||||||
|
"keep-alive",
|
||||||
|
"proxy-authenticate",
|
||||||
|
"proxy-authorization",
|
||||||
|
"te",
|
||||||
|
"trailers",
|
||||||
|
"transfer-encoding",
|
||||||
|
"upgrade",
|
||||||
|
"authorization", # we replace this one
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
DEFAULT_PORT = 8645
|
||||||
|
DEFAULT_HOST = "127.0.0.1"
|
||||||
|
|
||||||
|
|
||||||
|
def _json_error(status: int, message: str, code: str = "proxy_error") -> "web.Response":
|
||||||
|
"""Return an OpenAI-style error JSON response."""
|
||||||
|
body = {"error": {"message": message, "type": code, "code": code}}
|
||||||
|
return web.json_response(body, status=status)
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_request_headers(headers: "aiohttp.typedefs.LooseHeaders") -> dict:
|
||||||
|
"""Strip hop-by-hop + auth headers from the inbound request."""
|
||||||
|
out = {}
|
||||||
|
for key, value in headers.items():
|
||||||
|
if key.lower() in _HOP_BY_HOP_HEADERS:
|
||||||
|
continue
|
||||||
|
out[key] = value
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def _filter_response_headers(headers) -> dict:
|
||||||
|
"""Strip hop-by-hop headers from the upstream response."""
|
||||||
|
out = {}
|
||||||
|
for key, value in headers.items():
|
||||||
|
if key.lower() in _HOP_BY_HOP_HEADERS:
|
||||||
|
continue
|
||||||
|
# aiohttp recomputes Content-Encoding/Content-Length on stream — let it.
|
||||||
|
if key.lower() in ("content-encoding", "content-length"):
|
||||||
|
continue
|
||||||
|
out[key] = value
|
||||||
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
def create_app(adapter: UpstreamAdapter) -> "web.Application":
|
||||||
|
"""Build the aiohttp application bound to a specific upstream adapter."""
|
||||||
|
if not AIOHTTP_AVAILABLE:
|
||||||
|
raise RuntimeError(
|
||||||
|
"aiohttp is required for `hermes proxy`. Install with: "
|
||||||
|
"pip install 'hermes-agent[messaging]' or `pip install aiohttp`."
|
||||||
|
)
|
||||||
|
|
||||||
|
app = web.Application()
|
||||||
|
# AppKey ensures forward-compat with future aiohttp versions that strip
|
||||||
|
# bare-string keys.
|
||||||
|
_adapter_key = web.AppKey("adapter", UpstreamAdapter)
|
||||||
|
app[_adapter_key] = adapter
|
||||||
|
|
||||||
|
async def handle_health(request: "web.Request") -> "web.Response":
|
||||||
|
return web.json_response(
|
||||||
|
{
|
||||||
|
"status": "ok",
|
||||||
|
"upstream": adapter.display_name,
|
||||||
|
"authenticated": adapter.is_authenticated(),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def handle_models_fallback(request: "web.Request") -> "web.Response":
|
||||||
|
# Most clients hit /v1/models on startup. If the upstream doesn't
|
||||||
|
# serve /models, synthesize a minimal response so clients don't
|
||||||
|
# crash. The actual forwarding path handles /models when allowed.
|
||||||
|
return web.json_response(
|
||||||
|
{
|
||||||
|
"object": "list",
|
||||||
|
"data": [],
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
async def handle_proxy(request: "web.Request") -> "web.StreamResponse":
|
||||||
|
# Extract the path *after* /v1
|
||||||
|
rel_path = request.match_info.get("tail", "")
|
||||||
|
rel_path = "/" + rel_path.lstrip("/")
|
||||||
|
|
||||||
|
if rel_path not in adapter.allowed_paths:
|
||||||
|
allowed = ", ".join(sorted(adapter.allowed_paths))
|
||||||
|
return _json_error(
|
||||||
|
404,
|
||||||
|
f"Path /v1{rel_path} is not forwarded by this proxy. "
|
||||||
|
f"Allowed: {allowed}",
|
||||||
|
code="path_not_allowed",
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
cred = adapter.get_credential()
|
||||||
|
except Exception as exc:
|
||||||
|
logger.warning("proxy: credential resolution failed: %s", exc)
|
||||||
|
return _json_error(401, str(exc), code="upstream_auth_failed")
|
||||||
|
|
||||||
|
upstream_url = f"{cred.base_url.rstrip('/')}{rel_path}"
|
||||||
|
# Preserve query string verbatim.
|
||||||
|
if request.query_string:
|
||||||
|
upstream_url = f"{upstream_url}?{request.query_string}"
|
||||||
|
|
||||||
|
# Forward body verbatim. Read into memory once — request bodies for
|
||||||
|
# chat/completions/embeddings are small (<1MB typically). If we ever
|
||||||
|
# need to forward large multipart uploads we'll switch to streaming
|
||||||
|
# the request body too.
|
||||||
|
body = await request.read()
|
||||||
|
|
||||||
|
fwd_headers = _filter_request_headers(request.headers)
|
||||||
|
fwd_headers["Authorization"] = f"{cred.token_type} {cred.bearer}"
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
"proxy: forwarding %s %s -> %s (body=%d bytes)",
|
||||||
|
request.method, rel_path, upstream_url, len(body),
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use a per-request session so connection state doesn't leak between
|
||||||
|
# clients. Could be optimized to a shared session later.
|
||||||
|
timeout = aiohttp.ClientTimeout(total=None, sock_connect=15, sock_read=300)
|
||||||
|
try:
|
||||||
|
session = aiohttp.ClientSession(timeout=timeout)
|
||||||
|
except Exception as exc: # pragma: no cover - aiohttp setup issue
|
||||||
|
return _json_error(500, f"proxy session init failed: {exc}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
upstream_resp = await session.request(
|
||||||
|
request.method,
|
||||||
|
upstream_url,
|
||||||
|
data=body if body else None,
|
||||||
|
headers=fwd_headers,
|
||||||
|
allow_redirects=False,
|
||||||
|
)
|
||||||
|
except aiohttp.ClientError as exc:
|
||||||
|
await session.close()
|
||||||
|
logger.warning("proxy: upstream connection failed: %s", exc)
|
||||||
|
return _json_error(502, f"upstream connection failed: {exc}",
|
||||||
|
code="upstream_unreachable")
|
||||||
|
except asyncio.TimeoutError:
|
||||||
|
await session.close()
|
||||||
|
return _json_error(504, "upstream request timed out",
|
||||||
|
code="upstream_timeout")
|
||||||
|
|
||||||
|
# Stream response back. Headers first, then chunked body.
|
||||||
|
resp = web.StreamResponse(
|
||||||
|
status=upstream_resp.status,
|
||||||
|
headers=_filter_response_headers(upstream_resp.headers),
|
||||||
|
)
|
||||||
|
await resp.prepare(request)
|
||||||
|
|
||||||
|
try:
|
||||||
|
async for chunk in upstream_resp.content.iter_any():
|
||||||
|
if chunk:
|
||||||
|
await resp.write(chunk)
|
||||||
|
except (aiohttp.ClientError, asyncio.CancelledError) as exc:
|
||||||
|
logger.warning("proxy: streaming interrupted: %s", exc)
|
||||||
|
finally:
|
||||||
|
upstream_resp.release()
|
||||||
|
await session.close()
|
||||||
|
|
||||||
|
await resp.write_eof()
|
||||||
|
return resp
|
||||||
|
|
||||||
|
# /health doesn't go through the upstream
|
||||||
|
app.router.add_get("/health", handle_health)
|
||||||
|
# Catch-all under /v1 — forwards if the path is allowed.
|
||||||
|
app.router.add_route("*", "/v1/{tail:.*}", handle_proxy)
|
||||||
|
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
async def run_server(
|
||||||
|
adapter: UpstreamAdapter,
|
||||||
|
host: str = DEFAULT_HOST,
|
||||||
|
port: int = DEFAULT_PORT,
|
||||||
|
shutdown_event: Optional[asyncio.Event] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Run the proxy in the current event loop until shutdown_event is set.
|
||||||
|
|
||||||
|
If shutdown_event is None, runs until cancelled (Ctrl+C or SIGTERM).
|
||||||
|
"""
|
||||||
|
if not AIOHTTP_AVAILABLE:
|
||||||
|
raise RuntimeError(
|
||||||
|
"aiohttp is required for `hermes proxy`. Install with: "
|
||||||
|
"pip install 'hermes-agent[messaging]' or `pip install aiohttp`."
|
||||||
|
)
|
||||||
|
|
||||||
|
app = create_app(adapter)
|
||||||
|
runner = web.AppRunner(app, access_log=None)
|
||||||
|
await runner.setup()
|
||||||
|
site = web.TCPSite(runner, host=host, port=port)
|
||||||
|
await site.start()
|
||||||
|
|
||||||
|
logger.info(
|
||||||
|
"proxy: listening on http://%s:%d/v1 -> %s",
|
||||||
|
host, port, adapter.display_name,
|
||||||
|
)
|
||||||
|
|
||||||
|
stop_event = shutdown_event or asyncio.Event()
|
||||||
|
|
||||||
|
# Wire signal handlers when we own the loop's lifetime.
|
||||||
|
if shutdown_event is None:
|
||||||
|
loop = asyncio.get_running_loop()
|
||||||
|
for sig in (signal.SIGINT, signal.SIGTERM):
|
||||||
|
try:
|
||||||
|
loop.add_signal_handler(sig, stop_event.set)
|
||||||
|
except NotImplementedError:
|
||||||
|
# Windows / restricted environments — Ctrl+C will still
|
||||||
|
# raise KeyboardInterrupt and unwind us.
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
await stop_event.wait()
|
||||||
|
finally:
|
||||||
|
logger.info("proxy: shutting down")
|
||||||
|
await runner.cleanup()
|
||||||
|
|
||||||
|
|
||||||
|
__all__ = [
|
||||||
|
"create_app",
|
||||||
|
"run_server",
|
||||||
|
"DEFAULT_HOST",
|
||||||
|
"DEFAULT_PORT",
|
||||||
|
"AIOHTTP_AVAILABLE",
|
||||||
|
]
|
||||||
512
tests/hermes_cli/test_proxy.py
Normal file
512
tests/hermes_cli/test_proxy.py
Normal file
|
|
@ -0,0 +1,512 @@
|
||||||
|
"""Tests for the `hermes proxy` subcommand and its upstream adapters."""
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import asyncio
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
import threading
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Any, Dict
|
||||||
|
from unittest.mock import MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from hermes_cli.proxy.adapters import ADAPTERS, get_adapter
|
||||||
|
from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
|
||||||
|
from hermes_cli.proxy.adapters.nous_portal import NousPortalAdapter
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Adapter registry
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_registry_lists_nous():
|
||||||
|
assert "nous" in ADAPTERS
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_adapter_returns_instance():
|
||||||
|
adapter = get_adapter("nous")
|
||||||
|
assert isinstance(adapter, NousPortalAdapter)
|
||||||
|
assert isinstance(adapter, UpstreamAdapter)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_adapter_case_insensitive():
|
||||||
|
assert isinstance(get_adapter("NOUS"), NousPortalAdapter)
|
||||||
|
assert isinstance(get_adapter(" Nous "), NousPortalAdapter)
|
||||||
|
|
||||||
|
|
||||||
|
def test_get_adapter_unknown_provider_raises():
|
||||||
|
with pytest.raises(ValueError, match="anthropic"):
|
||||||
|
get_adapter("anthropic") # not yet implemented
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# NousPortalAdapter
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _write_auth_store(hermes_home: Path, nous_state: Dict[str, Any]) -> Path:
|
||||||
|
"""Write an auth.json with the given nous state into a hermetic HERMES_HOME."""
|
||||||
|
auth_path = hermes_home / "auth.json"
|
||||||
|
auth_path.write_text(json.dumps({
|
||||||
|
"version": 1,
|
||||||
|
"providers": {"nous": nous_state},
|
||||||
|
}))
|
||||||
|
return auth_path
|
||||||
|
|
||||||
|
|
||||||
|
def test_nous_adapter_metadata():
|
||||||
|
adapter = NousPortalAdapter()
|
||||||
|
assert adapter.name == "nous"
|
||||||
|
assert adapter.display_name == "Nous Portal"
|
||||||
|
assert "/chat/completions" in adapter.allowed_paths
|
||||||
|
assert "/embeddings" in adapter.allowed_paths
|
||||||
|
assert "/completions" in adapter.allowed_paths
|
||||||
|
assert "/models" in adapter.allowed_paths
|
||||||
|
|
||||||
|
|
||||||
|
def test_nous_adapter_not_authenticated_when_no_auth_file(tmp_path, monkeypatch):
|
||||||
|
# HERMES_HOME is already set by conftest, but make doubly sure
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
adapter = NousPortalAdapter()
|
||||||
|
assert not adapter.is_authenticated()
|
||||||
|
|
||||||
|
|
||||||
|
def test_nous_adapter_not_authenticated_when_provider_missing(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
(tmp_path / "auth.json").write_text(json.dumps({
|
||||||
|
"version": 1,
|
||||||
|
"providers": {},
|
||||||
|
}))
|
||||||
|
assert not NousPortalAdapter().is_authenticated()
|
||||||
|
|
||||||
|
|
||||||
|
def test_nous_adapter_authenticated_with_agent_key(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
_write_auth_store(tmp_path, {
|
||||||
|
"agent_key": "ov-test-key",
|
||||||
|
"agent_key_expires_at": "2099-01-01T00:00:00Z",
|
||||||
|
"inference_base_url": "https://inference-api.nousresearch.com/v1",
|
||||||
|
})
|
||||||
|
assert NousPortalAdapter().is_authenticated()
|
||||||
|
|
||||||
|
|
||||||
|
def test_nous_adapter_authenticated_with_refresh_token_only(tmp_path, monkeypatch):
|
||||||
|
"""If access_token+refresh_token exist but no agent_key yet, we can still mint."""
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
_write_auth_store(tmp_path, {
|
||||||
|
"access_token": "access-tok",
|
||||||
|
"refresh_token": "refresh-tok",
|
||||||
|
})
|
||||||
|
assert NousPortalAdapter().is_authenticated()
|
||||||
|
|
||||||
|
|
||||||
|
def test_nous_adapter_get_credential_refreshes_and_persists(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
_write_auth_store(tmp_path, {
|
||||||
|
"access_token": "access-tok",
|
||||||
|
"refresh_token": "refresh-tok",
|
||||||
|
"client_id": "hermes-cli",
|
||||||
|
"portal_base_url": "https://portal.nousresearch.com",
|
||||||
|
"inference_base_url": "https://inference-api.nousresearch.com/v1",
|
||||||
|
})
|
||||||
|
|
||||||
|
refreshed_state = {
|
||||||
|
"access_token": "access-tok",
|
||||||
|
"refresh_token": "refresh-tok",
|
||||||
|
"client_id": "hermes-cli",
|
||||||
|
"portal_base_url": "https://portal.nousresearch.com",
|
||||||
|
"inference_base_url": "https://inference-api.nousresearch.com/v1",
|
||||||
|
"agent_key": "minted-bearer",
|
||||||
|
"agent_key_expires_at": "2099-01-01T00:00:00Z",
|
||||||
|
}
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
|
||||||
|
return_value=refreshed_state,
|
||||||
|
) as mock_refresh:
|
||||||
|
adapter = NousPortalAdapter()
|
||||||
|
cred = adapter.get_credential()
|
||||||
|
|
||||||
|
mock_refresh.assert_called_once()
|
||||||
|
assert cred.bearer == "minted-bearer"
|
||||||
|
assert cred.base_url == "https://inference-api.nousresearch.com/v1"
|
||||||
|
assert cred.expires_at == "2099-01-01T00:00:00Z"
|
||||||
|
assert cred.token_type == "Bearer"
|
||||||
|
|
||||||
|
# Verify state was persisted back
|
||||||
|
stored = json.loads((tmp_path / "auth.json").read_text())
|
||||||
|
assert stored["providers"]["nous"]["agent_key"] == "minted-bearer"
|
||||||
|
|
||||||
|
|
||||||
|
def test_nous_adapter_get_credential_raises_when_not_logged_in(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
adapter = NousPortalAdapter()
|
||||||
|
with pytest.raises(RuntimeError, match="hermes login nous"):
|
||||||
|
adapter.get_credential()
|
||||||
|
|
||||||
|
|
||||||
|
def test_nous_adapter_get_credential_raises_on_refresh_failure(tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
_write_auth_store(tmp_path, {
|
||||||
|
"access_token": "access-tok",
|
||||||
|
"refresh_token": "refresh-tok",
|
||||||
|
})
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
|
||||||
|
side_effect=RuntimeError("Refresh session has been revoked"),
|
||||||
|
):
|
||||||
|
adapter = NousPortalAdapter()
|
||||||
|
with pytest.raises(RuntimeError, match="Refresh session has been revoked"):
|
||||||
|
adapter.get_credential()
|
||||||
|
|
||||||
|
|
||||||
|
def test_nous_adapter_get_credential_raises_when_no_agent_key_returned(tmp_path, monkeypatch):
|
||||||
|
"""If the refresh helper succeeds but produces no agent_key, we surface a clear error."""
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
_write_auth_store(tmp_path, {
|
||||||
|
"access_token": "access-tok",
|
||||||
|
"refresh_token": "refresh-tok",
|
||||||
|
})
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
|
||||||
|
return_value={"access_token": "a", "refresh_token": "r"},
|
||||||
|
):
|
||||||
|
adapter = NousPortalAdapter()
|
||||||
|
with pytest.raises(RuntimeError, match="did not return a usable agent_key"):
|
||||||
|
adapter.get_credential()
|
||||||
|
|
||||||
|
|
||||||
|
def test_nous_adapter_concurrent_refresh_serialized(tmp_path, monkeypatch):
|
||||||
|
"""Two parallel get_credential() calls must serialize through the lock."""
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
_write_auth_store(tmp_path, {
|
||||||
|
"access_token": "a", "refresh_token": "r",
|
||||||
|
})
|
||||||
|
|
||||||
|
call_log: list = []
|
||||||
|
in_flight = threading.Event()
|
||||||
|
overlap_detected = threading.Event()
|
||||||
|
counter = [0]
|
||||||
|
counter_lock = threading.Lock()
|
||||||
|
|
||||||
|
def serializing_refresh(state, **kwargs):
|
||||||
|
# If another thread is already inside refresh, the lock is broken.
|
||||||
|
if in_flight.is_set():
|
||||||
|
overlap_detected.set()
|
||||||
|
in_flight.set()
|
||||||
|
try:
|
||||||
|
call_log.append(threading.current_thread().ident)
|
||||||
|
# Simulate refresh latency so any race window is exposed.
|
||||||
|
import time
|
||||||
|
time.sleep(0.05)
|
||||||
|
with counter_lock:
|
||||||
|
counter[0] += 1
|
||||||
|
idx = counter[0]
|
||||||
|
return {
|
||||||
|
**state,
|
||||||
|
"agent_key": f"key-{idx}",
|
||||||
|
"agent_key_expires_at": "2099-01-01T00:00:00Z",
|
||||||
|
"inference_base_url": "https://inference-api.nousresearch.com/v1",
|
||||||
|
}
|
||||||
|
finally:
|
||||||
|
in_flight.clear()
|
||||||
|
|
||||||
|
adapter = NousPortalAdapter()
|
||||||
|
results: list = []
|
||||||
|
errors: list = []
|
||||||
|
|
||||||
|
def worker():
|
||||||
|
try:
|
||||||
|
results.append(adapter.get_credential().bearer)
|
||||||
|
except Exception as exc: # pragma: no cover - shouldn't happen
|
||||||
|
errors.append(exc)
|
||||||
|
|
||||||
|
with patch(
|
||||||
|
"hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
|
||||||
|
side_effect=serializing_refresh,
|
||||||
|
):
|
||||||
|
threads = [threading.Thread(target=worker) for _ in range(3)]
|
||||||
|
for t in threads:
|
||||||
|
t.start()
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
|
||||||
|
assert not errors, f"workers errored: {errors}"
|
||||||
|
assert len(results) == 3
|
||||||
|
assert len(call_log) == 3
|
||||||
|
assert not overlap_detected.is_set(), "refresh calls overlapped — lock is broken"
|
||||||
|
assert all(r.startswith("key-") for r in results)
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Server: path filtering + forwarding
|
||||||
|
#
|
||||||
|
# We run the proxy AND a fake upstream as real aiohttp servers on ephemeral
|
||||||
|
# ports. Avoids pytest-aiohttp's fixtures (extra dependency for one test file).
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
aiohttp = pytest.importorskip("aiohttp")
|
||||||
|
from aiohttp import web # noqa: E402
|
||||||
|
|
||||||
|
from hermes_cli.proxy.server import create_app # noqa: E402
|
||||||
|
|
||||||
|
|
||||||
|
class FakeAdapter(UpstreamAdapter):
|
||||||
|
"""A test adapter that returns a fixed credential without touching disk."""
|
||||||
|
|
||||||
|
def __init__(self, base_url: str, bearer: str = "test-bearer",
|
||||||
|
allowed=None, raise_on_credential=False):
|
||||||
|
self._base_url = base_url
|
||||||
|
self._bearer = bearer
|
||||||
|
self._allowed = frozenset(allowed or ["/chat/completions"])
|
||||||
|
self._raise = raise_on_credential
|
||||||
|
self.calls = 0
|
||||||
|
|
||||||
|
@property
|
||||||
|
def name(self): return "fake"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def display_name(self): return "Fake Provider"
|
||||||
|
|
||||||
|
@property
|
||||||
|
def allowed_paths(self): return self._allowed
|
||||||
|
|
||||||
|
def is_authenticated(self): return True
|
||||||
|
|
||||||
|
def get_credential(self):
|
||||||
|
self.calls += 1
|
||||||
|
if self._raise:
|
||||||
|
raise RuntimeError("simulated auth failure")
|
||||||
|
return UpstreamCredential(
|
||||||
|
bearer=self._bearer, base_url=self._base_url,
|
||||||
|
expires_at="2099-01-01T00:00:00Z",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def _start_runner(app: "web.Application"):
|
||||||
|
"""Spin up an aiohttp app on an ephemeral localhost port. Returns (runner, base_url)."""
|
||||||
|
runner = web.AppRunner(app, access_log=None)
|
||||||
|
await runner.setup()
|
||||||
|
site = web.TCPSite(runner, host="127.0.0.1", port=0)
|
||||||
|
await site.start()
|
||||||
|
sockets = list(site._server.sockets) # type: ignore[union-attr]
|
||||||
|
port = sockets[0].getsockname()[1]
|
||||||
|
return runner, f"http://127.0.0.1:{port}"
|
||||||
|
|
||||||
|
|
||||||
|
def _build_fake_upstream(captured: Dict[str, Any]) -> "web.Application":
|
||||||
|
async def echo(request):
|
||||||
|
body = await request.read()
|
||||||
|
captured["requests"].append({
|
||||||
|
"method": request.method,
|
||||||
|
"path": request.path,
|
||||||
|
"auth": request.headers.get("Authorization"),
|
||||||
|
"body": body.decode("utf-8") if body else "",
|
||||||
|
})
|
||||||
|
return web.json_response({"echoed": True, "path": request.path})
|
||||||
|
|
||||||
|
async def sse(request):
|
||||||
|
resp = web.StreamResponse(
|
||||||
|
status=200, headers={"Content-Type": "text/event-stream"},
|
||||||
|
)
|
||||||
|
await resp.prepare(request)
|
||||||
|
for chunk in [b"data: hello\n\n", b"data: world\n\n", b"data: [DONE]\n\n"]:
|
||||||
|
await resp.write(chunk)
|
||||||
|
await resp.write_eof()
|
||||||
|
return resp
|
||||||
|
|
||||||
|
app = web.Application()
|
||||||
|
app.router.add_route("*", "/v1/chat/completions", echo)
|
||||||
|
app.router.add_route("*", "/v1/embeddings", echo)
|
||||||
|
app.router.add_route("*", "/v1/sse", sse)
|
||||||
|
return app
|
||||||
|
|
||||||
|
|
||||||
|
def test_server_forwards_chat_completions():
|
||||||
|
async def run():
|
||||||
|
captured: Dict[str, Any] = {"requests": []}
|
||||||
|
upstream_runner, upstream_base = await _start_runner(_build_fake_upstream(captured))
|
||||||
|
adapter = FakeAdapter(f"{upstream_base}/v1", bearer="real-portal-key")
|
||||||
|
proxy_runner, proxy_base = await _start_runner(create_app(adapter))
|
||||||
|
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.post(
|
||||||
|
f"{proxy_base}/v1/chat/completions",
|
||||||
|
json={"model": "Hermes-4-70B",
|
||||||
|
"messages": [{"role": "user", "content": "hi"}]},
|
||||||
|
headers={"Authorization": "Bearer client-dummy-key"},
|
||||||
|
) as resp:
|
||||||
|
assert resp.status == 200
|
||||||
|
data = await resp.json()
|
||||||
|
assert data["echoed"] is True
|
||||||
|
|
||||||
|
assert len(captured["requests"]) == 1
|
||||||
|
req = captured["requests"][0]
|
||||||
|
assert req["auth"] == "Bearer real-portal-key"
|
||||||
|
assert "Hermes-4-70B" in req["body"]
|
||||||
|
finally:
|
||||||
|
await proxy_runner.cleanup()
|
||||||
|
await upstream_runner.cleanup()
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
|
||||||
|
|
||||||
|
def test_server_rejects_disallowed_path():
|
||||||
|
async def run():
|
||||||
|
adapter = FakeAdapter("http://unused.example/v1", allowed=["/chat/completions"])
|
||||||
|
runner, base = await _start_runner(create_app(adapter))
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(f"{base}/v1/random/endpoint") as resp:
|
||||||
|
assert resp.status == 404
|
||||||
|
body = await resp.json()
|
||||||
|
assert body["error"]["type"] == "path_not_allowed"
|
||||||
|
assert "/chat/completions" in body["error"]["message"]
|
||||||
|
finally:
|
||||||
|
await runner.cleanup()
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
|
||||||
|
|
||||||
|
def test_server_returns_401_when_adapter_fails():
|
||||||
|
async def run():
|
||||||
|
adapter = FakeAdapter("http://unused.example/v1", raise_on_credential=True)
|
||||||
|
runner, base = await _start_runner(create_app(adapter))
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.post(f"{base}/v1/chat/completions", json={}) as resp:
|
||||||
|
assert resp.status == 401
|
||||||
|
body = await resp.json()
|
||||||
|
assert body["error"]["type"] == "upstream_auth_failed"
|
||||||
|
assert "simulated auth failure" in body["error"]["message"]
|
||||||
|
finally:
|
||||||
|
await runner.cleanup()
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
|
||||||
|
|
||||||
|
def test_server_health_endpoint():
|
||||||
|
async def run():
|
||||||
|
adapter = FakeAdapter("http://unused.example/v1")
|
||||||
|
runner, base = await _start_runner(create_app(adapter))
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(f"{base}/health") as resp:
|
||||||
|
assert resp.status == 200
|
||||||
|
body = await resp.json()
|
||||||
|
assert body["status"] == "ok"
|
||||||
|
assert body["upstream"] == "Fake Provider"
|
||||||
|
assert body["authenticated"] is True
|
||||||
|
finally:
|
||||||
|
await runner.cleanup()
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
|
||||||
|
|
||||||
|
def test_server_streams_sse():
|
||||||
|
async def run():
|
||||||
|
captured: Dict[str, Any] = {"requests": []}
|
||||||
|
upstream_runner, upstream_base = await _start_runner(_build_fake_upstream(captured))
|
||||||
|
adapter = FakeAdapter(f"{upstream_base}/v1", allowed=["/sse"])
|
||||||
|
proxy_runner, proxy_base = await _start_runner(create_app(adapter))
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.get(f"{proxy_base}/v1/sse") as resp:
|
||||||
|
assert resp.status == 200
|
||||||
|
chunks = []
|
||||||
|
async for chunk in resp.content.iter_any():
|
||||||
|
chunks.append(chunk)
|
||||||
|
full = b"".join(chunks)
|
||||||
|
assert b"data: hello" in full
|
||||||
|
assert b"data: [DONE]" in full
|
||||||
|
finally:
|
||||||
|
await proxy_runner.cleanup()
|
||||||
|
await upstream_runner.cleanup()
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
|
||||||
|
|
||||||
|
def test_server_strips_client_auth_header():
|
||||||
|
"""The client's Authorization header MUST NOT reach the upstream."""
|
||||||
|
async def run():
|
||||||
|
captured: Dict[str, Any] = {"requests": []}
|
||||||
|
upstream_runner, upstream_base = await _start_runner(_build_fake_upstream(captured))
|
||||||
|
adapter = FakeAdapter(f"{upstream_base}/v1", bearer="ours")
|
||||||
|
proxy_runner, proxy_base = await _start_runner(create_app(adapter))
|
||||||
|
try:
|
||||||
|
async with aiohttp.ClientSession() as session:
|
||||||
|
async with session.post(
|
||||||
|
f"{proxy_base}/v1/chat/completions",
|
||||||
|
json={},
|
||||||
|
headers={"Authorization": "Bearer SHOULD_NOT_LEAK"},
|
||||||
|
) as resp:
|
||||||
|
await resp.read()
|
||||||
|
assert captured["requests"][0]["auth"] == "Bearer ours"
|
||||||
|
assert "SHOULD_NOT_LEAK" not in captured["requests"][0]["auth"]
|
||||||
|
finally:
|
||||||
|
await proxy_runner.cleanup()
|
||||||
|
await upstream_runner.cleanup()
|
||||||
|
|
||||||
|
asyncio.run(run())
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# CLI handlers
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_cmd_proxy_status_runs(capsys, tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
from hermes_cli.proxy.cli import cmd_proxy_status
|
||||||
|
|
||||||
|
args = MagicMock()
|
||||||
|
rc = cmd_proxy_status(args)
|
||||||
|
assert rc == 0
|
||||||
|
out = capsys.readouterr().out
|
||||||
|
assert "nous" in out
|
||||||
|
assert "Nous Portal" in out
|
||||||
|
assert "not logged in" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_cmd_proxy_providers_runs(capsys):
|
||||||
|
from hermes_cli.proxy.cli import cmd_proxy_list_providers
|
||||||
|
|
||||||
|
args = MagicMock()
|
||||||
|
rc = cmd_proxy_list_providers(args)
|
||||||
|
assert rc == 0
|
||||||
|
out = capsys.readouterr().out
|
||||||
|
assert "nous" in out
|
||||||
|
assert "Nous Portal" in out
|
||||||
|
|
||||||
|
|
||||||
|
def test_cmd_proxy_start_refuses_unknown_provider(capsys):
|
||||||
|
from hermes_cli.proxy.cli import cmd_proxy_start
|
||||||
|
|
||||||
|
args = MagicMock()
|
||||||
|
args.provider = "no-such-provider"
|
||||||
|
args.host = None
|
||||||
|
args.port = None
|
||||||
|
rc = cmd_proxy_start(args)
|
||||||
|
assert rc == 2
|
||||||
|
err = capsys.readouterr().err
|
||||||
|
assert "no-such-provider" in err
|
||||||
|
|
||||||
|
|
||||||
|
def test_cmd_proxy_start_refuses_when_unauthenticated(capsys, tmp_path, monkeypatch):
|
||||||
|
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
|
||||||
|
from hermes_cli.proxy.cli import cmd_proxy_start
|
||||||
|
|
||||||
|
args = MagicMock()
|
||||||
|
args.provider = "nous"
|
||||||
|
args.host = None
|
||||||
|
args.port = None
|
||||||
|
rc = cmd_proxy_start(args)
|
||||||
|
assert rc == 2
|
||||||
|
err = capsys.readouterr().err
|
||||||
|
assert "hermes login nous" in err
|
||||||
|
|
@ -40,6 +40,7 @@ hermes [global-options] <command> [subcommand/options]
|
||||||
| `hermes model` | Interactively choose the default provider and model. |
|
| `hermes model` | Interactively choose the default provider and model. |
|
||||||
| `hermes fallback` | Manage fallback providers tried when the primary model errors. |
|
| `hermes fallback` | Manage fallback providers tried when the primary model errors. |
|
||||||
| `hermes gateway` | Run or manage the messaging gateway service. |
|
| `hermes gateway` | Run or manage the messaging gateway service. |
|
||||||
|
| `hermes proxy` | Local OpenAI-compatible proxy that attaches OAuth provider credentials. See [Subscription Proxy](../user-guide/features/subscription-proxy.md). |
|
||||||
| `hermes lsp` | Manage Language Server Protocol integration (semantic diagnostics for write_file/patch). |
|
| `hermes lsp` | Manage Language Server Protocol integration (semantic diagnostics for write_file/patch). |
|
||||||
| `hermes setup` | Interactive setup wizard for all or part of the configuration. |
|
| `hermes setup` | Interactive setup wizard for all or part of the configuration. |
|
||||||
| `hermes whatsapp` | Configure and pair the WhatsApp bridge. |
|
| `hermes whatsapp` | Configure and pair the WhatsApp bridge. |
|
||||||
|
|
|
||||||
203
website/docs/user-guide/features/subscription-proxy.md
Normal file
203
website/docs/user-guide/features/subscription-proxy.md
Normal file
|
|
@ -0,0 +1,203 @@
|
||||||
|
---
|
||||||
|
sidebar_position: 15
|
||||||
|
title: "Subscription Proxy"
|
||||||
|
description: "Use your Nous Portal subscription (or other OAuth provider) as an OpenAI-compatible endpoint for external apps"
|
||||||
|
---
|
||||||
|
|
||||||
|
# Subscription Proxy
|
||||||
|
|
||||||
|
The subscription proxy is a local HTTP server that lets external apps —
|
||||||
|
OpenViking, Karakeep, Open WebUI, anything that speaks OpenAI-compatible
|
||||||
|
chat completions — use your Hermes-managed provider subscription as their
|
||||||
|
LLM endpoint. The proxy attaches the right credentials (refreshing them
|
||||||
|
automatically) so the app never needs a static API key.
|
||||||
|
|
||||||
|
This is different from the [API server](./api-server.md):
|
||||||
|
|
||||||
|
| | API server | Subscription proxy |
|
||||||
|
|---|---|---|
|
||||||
|
| What it serves | Your agent (full toolset, memory, skills) | Raw model inference |
|
||||||
|
| Use case | "Use Hermes as a chat backend" | "Use my Portal sub from another app" |
|
||||||
|
| Auth | Your `API_SERVER_KEY` | Any bearer (proxy attaches the real one) |
|
||||||
|
| Tool calls | Yes — the agent runs tools | No — passthrough only |
|
||||||
|
|
||||||
|
Use the API server when you want the **agent** as a backend. Use the
|
||||||
|
proxy when you just want **the model** through your subscription.
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
### 1. Log into your provider (one-time)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes login nous
|
||||||
|
```
|
||||||
|
|
||||||
|
This opens your browser for the Nous Portal OAuth flow. Hermes stores
|
||||||
|
the refresh token in `~/.hermes/auth.json` — the same place all Hermes
|
||||||
|
provider logins live.
|
||||||
|
|
||||||
|
### 2. Start the proxy
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes proxy start
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
Starting Hermes proxy for Nous Portal
|
||||||
|
Listening on: http://127.0.0.1:8645/v1
|
||||||
|
Forwarding to: (resolved per-request from your subscription)
|
||||||
|
Use any bearer token in the client — the proxy attaches your real credential.
|
||||||
|
```
|
||||||
|
|
||||||
|
Leave this running in the foreground. Use `tmux`, `nohup`, or a systemd
|
||||||
|
unit if you want it to survive logout.
|
||||||
|
|
||||||
|
### 3. Point your app at it
|
||||||
|
|
||||||
|
Any OpenAI-compatible app config takes the same triple:
|
||||||
|
|
||||||
|
```
|
||||||
|
Base URL: http://127.0.0.1:8645/v1
|
||||||
|
API key: anything (e.g. "sk-unused")
|
||||||
|
Model: Hermes-4-70B # or Hermes-4.3-36B, Hermes-4-405B
|
||||||
|
```
|
||||||
|
|
||||||
|
The proxy ignores the `Authorization` header from your app and attaches
|
||||||
|
your real Portal credential to the upstream request. Refreshes happen
|
||||||
|
automatically when the bearer approaches expiry.
|
||||||
|
|
||||||
|
## Available providers
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes proxy providers
|
||||||
|
```
|
||||||
|
|
||||||
|
Currently shipped: `nous` (Nous Portal). More OAuth providers can be
|
||||||
|
added by implementing the `UpstreamAdapter` interface in
|
||||||
|
`hermes_cli/proxy/adapters/`.
|
||||||
|
|
||||||
|
## Check status
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes proxy status
|
||||||
|
```
|
||||||
|
|
||||||
|
```
|
||||||
|
Hermes proxy upstream adapters
|
||||||
|
|
||||||
|
[nous ] Nous Portal — ready (bearer expires 2026-05-15T06:43:21Z)
|
||||||
|
```
|
||||||
|
|
||||||
|
If you see `not logged in`, run `hermes login nous`. If you see
|
||||||
|
`credentials need attention`, your refresh token was revoked (rare —
|
||||||
|
happens if you signed out from the Portal web UI) — just re-run
|
||||||
|
`hermes login nous`.
|
||||||
|
|
||||||
|
## Allowed paths
|
||||||
|
|
||||||
|
The proxy only forwards paths the upstream actually serves. For Nous
|
||||||
|
Portal:
|
||||||
|
|
||||||
|
| Path | Purpose |
|
||||||
|
|------|---------|
|
||||||
|
| `/v1/chat/completions` | Chat completions (streaming + non-streaming) |
|
||||||
|
| `/v1/completions` | Legacy text completions |
|
||||||
|
| `/v1/embeddings` | Embeddings |
|
||||||
|
| `/v1/models` | Model list |
|
||||||
|
|
||||||
|
Other paths (`/v1/images/generations`, `/v1/audio/speech`, etc.) return
|
||||||
|
404 with a clear error pointing at the allowed paths. This keeps stray
|
||||||
|
clients from leaking weird requests to the upstream.
|
||||||
|
|
||||||
|
## Configuring OpenViking to use Portal
|
||||||
|
|
||||||
|
[OpenViking](https://github.com/volcengine/OpenViking) is a context
|
||||||
|
database that needs an LLM provider for its VLM (vision/language model
|
||||||
|
used to extract memories) and embedding model. With the proxy, you can
|
||||||
|
point its `vlm.api_base` at your local proxy:
|
||||||
|
|
||||||
|
Edit `~/.openviking/ov.conf`:
|
||||||
|
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"vlm": {
|
||||||
|
"provider": "openai",
|
||||||
|
"model": "Hermes-4-70B",
|
||||||
|
"api_base": "http://127.0.0.1:8645/v1",
|
||||||
|
"api_key": "unused-proxy-attaches-real-creds"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
Then start your proxy in a terminal alongside `openviking-server`:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Terminal 1
|
||||||
|
hermes proxy start
|
||||||
|
|
||||||
|
# Terminal 2
|
||||||
|
openviking-server
|
||||||
|
```
|
||||||
|
|
||||||
|
OpenViking's VLM calls now flow through your Portal subscription. The
|
||||||
|
embedding model side still needs its own provider — Portal does serve
|
||||||
|
`/v1/embeddings` but the model selection depends on what your tier
|
||||||
|
supports; check `portal.nousresearch.com/models`.
|
||||||
|
|
||||||
|
## Configuring Karakeep (or any bookmark/summarizer app)
|
||||||
|
|
||||||
|
[Karakeep](https://karakeep.app/) takes an OpenAI-compatible API for
|
||||||
|
bookmark summarization. In its config:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Karakeep .env
|
||||||
|
OPENAI_API_BASE_URL=http://127.0.0.1:8645/v1
|
||||||
|
OPENAI_API_KEY=any-non-empty-string
|
||||||
|
INFERENCE_TEXT_MODEL=Hermes-4-70B
|
||||||
|
```
|
||||||
|
|
||||||
|
Same pattern works for Open WebUI, LobeChat, NextChat, or any other
|
||||||
|
OpenAI-compatible client.
|
||||||
|
|
||||||
|
## Exposing on LAN
|
||||||
|
|
||||||
|
By default the proxy binds `127.0.0.1` (localhost only). To let other
|
||||||
|
machines on your network use it:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
hermes proxy start --host 0.0.0.0 --port 8645
|
||||||
|
```
|
||||||
|
|
||||||
|
⚠ **Be aware:** anyone on your network can now use your Portal
|
||||||
|
subscription. The proxy has no auth of its own — it accepts any bearer.
|
||||||
|
Use a firewall, VPN, or reverse proxy with proper auth if you expose
|
||||||
|
this beyond your trusted network.
|
||||||
|
|
||||||
|
## Rate limits
|
||||||
|
|
||||||
|
Your Portal tier's RPM/TPM limits apply across the whole proxy. The
|
||||||
|
proxy doesn't fan out or pool — it's a single bearer with your full
|
||||||
|
subscription quota. Monitor usage at
|
||||||
|
[portal.nousresearch.com](https://portal.nousresearch.com).
|
||||||
|
|
||||||
|
## Architecture
|
||||||
|
|
||||||
|
The proxy is intentionally minimal. Per request:
|
||||||
|
|
||||||
|
1. Receive `POST /v1/chat/completions` from your app
|
||||||
|
2. Look up the adapter's current credential (refresh if expiring)
|
||||||
|
3. Forward the request body verbatim, with `Authorization: Bearer <minted-key>`
|
||||||
|
4. Stream the response back unchanged (SSE preserved)
|
||||||
|
|
||||||
|
No transformation. No logging of request bodies. No agent loop. The
|
||||||
|
proxy is a credential-attaching pass-through.
|
||||||
|
|
||||||
|
## Future: more OAuth providers
|
||||||
|
|
||||||
|
The adapter system is pluggable. Adding a new provider (e.g.
|
||||||
|
HuggingFace, GitHub Copilot's chat endpoint, Anthropic via OAuth)
|
||||||
|
requires implementing `UpstreamAdapter` in
|
||||||
|
`hermes_cli/proxy/adapters/<provider>.py` and registering it in
|
||||||
|
`adapters/__init__.py`. Providers that aren't OpenAI-compatible at the
|
||||||
|
protocol level (Anthropic Messages API, for example) would need a
|
||||||
|
transformation layer, which is out of scope for the current shape.
|
||||||
|
|
@ -96,6 +96,7 @@ const sidebars: SidebarsConfig = {
|
||||||
items: [
|
items: [
|
||||||
'user-guide/features/web-dashboard',
|
'user-guide/features/web-dashboard',
|
||||||
'user-guide/features/extending-the-dashboard',
|
'user-guide/features/extending-the-dashboard',
|
||||||
|
'user-guide/features/subscription-proxy',
|
||||||
],
|
],
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue