feat(proxy): local OpenAI-compatible proxy for OAuth providers (#25969)

Adds 'hermes proxy start' — a local HTTP server that lets external apps (OpenViking, Karakeep, Open WebUI, ...) use a Hermes-managed provider subscription as their LLM endpoint. The proxy attaches the user's real OAuth-resolved credentials to each forwarded request, refreshing them automatically; the client can send any bearer (it gets stripped). Ships with one adapter — Nous Portal. The UpstreamAdapter ABC and registry in hermes_cli/proxy/adapters/ are designed for additional OAuth providers to plug in by name without server changes. Commands: hermes proxy start [--provider nous] [--host 127.0.0.1] [--port 8645] hermes proxy status hermes proxy providers Allowed Portal paths: /v1/chat/completions, /v1/completions, /v1/embeddings, /v1/models. Anything else returns 404 with a clear error pointing at the allowed list. aiohttp is gated like gateway/platforms/api_server.py (try-import, clean runtime error if missing). No new core dependency. Tests: 24 unit tests + 1 separate E2E that spawns the real subprocess and verifies the upstream receives the right bearer with the client's header stripped.
2026-05-18 04:41:56 +00:00 · 2026-05-14 15:40:48 -07:00 · 2026-05-14 15:40:48 -07:00 · ccb5aae0d2
commit ccb5aae0d2
parent 34fc94d1f4
11 changed files with 1466 additions and 1 deletions
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -1452,6 +1452,17 @@ def cmd_gateway(args):
    gateway_command(args)
 def cmd_proxy(args):
    """Local OpenAI-compatible proxy to OAuth providers."""
    # Lazy import — pulls in aiohttp, which is gated behind an extras install
    # for users who don't run the proxy or the messaging gateway.
    from hermes_cli.proxy.cli import cmd_proxy as _cmd_proxy
    rc = _cmd_proxy(args)
    if isinstance(rc, int) and rc != 0:
        raise SystemExit(rc)
 def cmd_whatsapp(args):
    """Set up WhatsApp: choose mode, configure, install bridge, pair via QR."""
    _require_tty("whatsapp")
@ -9385,7 +9396,7 @@ _BUILTIN_SUBCOMMANDS = frozenset(
        "config", "cron", "curator", "dashboard", "debug", "doctor",
        "dump", "fallback", "gateway", "hooks", "import", "insights",
        "kanban", "login", "logout", "logs", "lsp", "mcp", "memory",
-        "model", "pairing", "plugins", "profile", "sessions", "setup",
+        "model", "pairing", "plugins", "profile", "proxy", "sessions", "setup",
        "skills", "slack", "status", "tools", "uninstall", "update",
        "version", "webhook", "whatsapp", "chat",
        # Help-ish invocations — plugin commands not being listed in
@ -9727,6 +9738,51 @@ def main():
        help="Skip the confirmation prompt",
    )
    # =========================================================================
    # proxy command — local OpenAI-compatible proxy that attaches the user's
    # OAuth-authenticated provider credentials to outbound requests. Lets
    # external apps (OpenViking, Karakeep, Open WebUI, ...) ride a logged-in
    # subscription without copy-pasting static API keys.
    # =========================================================================
    proxy_parser = subparsers.add_parser(
        "proxy",
        help="Local OpenAI-compatible proxy to OAuth providers",
        description=(
            "Run a local HTTP server that forwards OpenAI-compatible requests "
            "to an OAuth-authenticated provider (e.g. Nous Portal). External "
            "apps can point at the proxy with any bearer token; the proxy "
            "attaches your real credentials."
        ),
    )
    proxy_subparsers = proxy_parser.add_subparsers(dest="proxy_command")
    proxy_start = proxy_subparsers.add_parser(
        "start", help="Run the proxy in the foreground"
    )
    proxy_start.add_argument(
        "--provider",
        default="nous",
        help="Upstream provider (default: nous). See `hermes proxy providers`.",
    )
    proxy_start.add_argument(
        "--host",
        default=None,
        help="Bind address (default: 127.0.0.1). Use 0.0.0.0 to expose on LAN.",
    )
    proxy_start.add_argument(
        "--port",
        type=int,
        default=None,
        help="Bind port (default: 8645)",
    )
    proxy_subparsers.add_parser(
        "status", help="Show which proxy upstreams are ready"
    )
    proxy_subparsers.add_parser(
        "providers", help="List available proxy upstream providers"
    )
    proxy_parser.set_defaults(func=cmd_proxy)
    gateway_parser.set_defaults(func=cmd_gateway)
    # =========================================================================
--- a/hermes_cli/proxy/init.py
+++ b/hermes_cli/proxy/init.py
@ -0,0 +1,20 @@
 """Local OpenAI-compatible proxy that forwards to OAuth-authenticated upstreams.
 Lets external apps (OpenViking, Karakeep, Open WebUI, ...) ride the user's
 already-logged-in provider subscription instead of needing a static API key
 copy-pasted into each app's config.
 The proxy listens on ``127.0.0.1:<port>``, accepts any bearer (the client's
 ``Authorization`` header is discarded), and attaches the user's real
 upstream credential to the forwarded request. The credential is refreshed
 automatically when it approaches expiry.
 First-class adapter:
  - ``nous`` — Nous Portal (https://inference-api.nousresearch.com/v1)
 Future adapters can plug in by implementing ``UpstreamAdapter``.
 """
 from hermes_cli.proxy.adapters.base import UpstreamAdapter
 __all__ = ["UpstreamAdapter"]
--- a/hermes_cli/proxy/adapters/init.py
+++ b/hermes_cli/proxy/adapters/init.py
@ -0,0 +1,35 @@
 """Upstream adapter registry for the local proxy server.
 Each adapter wraps a provider's OAuth state and exposes a uniform interface
 the proxy server can use to forward requests with a freshly-minted bearer
 token. See :class:`UpstreamAdapter` for the contract.
 """
 from typing import Dict, Type
 from hermes_cli.proxy.adapters.base import UpstreamAdapter
 from hermes_cli.proxy.adapters.nous_portal import NousPortalAdapter
 # Registry of available adapter classes keyed by provider name as used on
 # the ``hermes proxy start --provider <name>`` CLI flag.
 ADAPTERS: Dict[str, Type[UpstreamAdapter]] = {
    "nous": NousPortalAdapter,
 }
 def get_adapter(name: str) -> UpstreamAdapter:
    """Instantiate an adapter by provider name.
    Raises:
        ValueError: if ``name`` is not a registered adapter.
    """
    key = (name or "").strip().lower()
    if key not in ADAPTERS:
        available = ", ".join(sorted(ADAPTERS)) or "(none)"
        raise ValueError(
            f"Unknown proxy upstream provider: {name!r}. Available: {available}"
        )
    return ADAPTERS[key]()
 __all__ = ["UpstreamAdapter", "ADAPTERS", "get_adapter"]
--- a/hermes_cli/proxy/adapters/base.py
+++ b/hermes_cli/proxy/adapters/base.py
@ -0,0 +1,94 @@
 """Abstract base for proxy upstream adapters.
 An :class:`UpstreamAdapter` represents one OAuth-authenticated provider the
 local proxy can forward requests to. The adapter is responsible for:
  - locating the user's auth state for that provider
  - refreshing/minting credentials when needed
  - reporting the resolved upstream base URL
  - declaring which request paths it accepts
 The proxy server is otherwise provider-agnostic.
 """
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import FrozenSet, Optional
@dataclass(frozen=True)
 class UpstreamCredential:
    """A resolved bearer + base URL ready to forward to."""
    bearer: str
    """Authorization header value to send upstream (token only, no ``Bearer`` prefix)."""
    base_url: str
    """Upstream base URL, e.g. ``https://inference-api.nousresearch.com/v1``."""
    token_type: str = "Bearer"
    """Auth scheme — currently always ``Bearer`` for supported providers."""
    expires_at: Optional[str] = None
    """ISO-8601 expiry timestamp for the bearer, when known. Informational."""
 class UpstreamAdapter(ABC):
    """Contract for an upstream provider the proxy can forward to."""
    @property
    @abstractmethod
    def name(self) -> str:
        """Adapter key used on the CLI (e.g. ``"nous"``)."""
    @property
    @abstractmethod
    def display_name(self) -> str:
        """Human-readable provider name for logs and ``proxy status``."""
    @property
    @abstractmethod
    def allowed_paths(self) -> FrozenSet[str]:
        """Set of relative request paths the upstream accepts.
        Paths are relative to the proxy's ``/v1`` mount point. For example,
        ``"/chat/completions"`` corresponds to a client request to
        ``http://127.0.0.1:<port>/v1/chat/completions``. Requests to paths
        not in this set get a 404 with a helpful error body.
        """
    @abstractmethod
    def is_authenticated(self) -> bool:
        """Return True if the user has usable credentials for this upstream.
        Should be cheap — no network calls. Used by ``proxy start`` for a
        clear up-front error before binding a port.
        """
    @abstractmethod
    def get_credential(self) -> UpstreamCredential:
        """Return a fresh credential, refreshing/minting if necessary.
        Implementations should:
          - refresh the access token if it's near expiry
          - mint/rotate the upstream bearer key if it's near expiry
          - persist any refreshed state back to disk
        Raises:
            RuntimeError: if the user isn't authenticated or the upstream
              refresh fails. The proxy will return 401 to the client.
        """
    def describe(self) -> str:
        """One-line status summary for ``proxy status``."""
        try:
            cred = self.get_credential()
        except Exception as exc:  # pragma: no cover - defensive
            return f"{self.display_name}: not ready ({exc})"
        ttl = f" (expires {cred.expires_at})" if cred.expires_at else ""
        return f"{self.display_name}: {cred.base_url}{ttl}"
 __all__ = ["UpstreamAdapter", "UpstreamCredential"]
--- a/hermes_cli/proxy/adapters/nous_portal.py
+++ b/hermes_cli/proxy/adapters/nous_portal.py
@ -0,0 +1,137 @@
 """Nous Portal upstream adapter.
 Reads the user's Nous OAuth state from ``~/.hermes/auth.json``, refreshes
 the access token and mints a fresh agent key when needed, and exposes the
 upstream base URL plus minted bearer for the proxy server to forward to.
 The minted ``agent_key`` (not the OAuth ``access_token``) is what
 ``inference-api.nousresearch.com`` accepts as a bearer. The refresh helper
 already handles both — see :func:`hermes_cli.auth.refresh_nous_oauth_from_state`.
 """
 from __future__ import annotations
 import logging
 import threading
 from typing import Any, Dict, FrozenSet, Optional
 from hermes_cli.auth import (
    DEFAULT_NOUS_INFERENCE_URL,
    _load_auth_store,
    _save_auth_store,
    _write_shared_nous_state,
    refresh_nous_oauth_from_state,
 )
 from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
 logger = logging.getLogger(__name__)
 # Endpoints inference-api.nousresearch.com actually serves. Anything else
 # the proxy will reject with 404 — keeps stray clients from leaking weird
 # requests to the upstream.
 _ALLOWED_PATHS: FrozenSet[str] = frozenset(
    {
        "/chat/completions",
        "/completions",
        "/embeddings",
        "/models",
    }
 )
 class NousPortalAdapter(UpstreamAdapter):
    """Proxy upstream for the Nous Portal inference API."""
    def __init__(self) -> None:
        # Lock guards _load → refresh → _save against parallel proxy requests
        # racing to refresh expired tokens. Refresh itself is HTTP, so we
        # hold the lock across the network call (brief; OAuth refresh is fast).
        self._lock = threading.Lock()
    @property
    def name(self) -> str:
        return "nous"
    @property
    def display_name(self) -> str:
        return "Nous Portal"
    @property
    def allowed_paths(self) -> FrozenSet[str]:
        return _ALLOWED_PATHS
    def is_authenticated(self) -> bool:
        state = self._read_state()
        if state is None:
            return False
        # We need either a usable agent_key OR (refresh_token + access_token)
        # to recover. The refresh helper will mint/refresh as needed.
        return bool(
            state.get("agent_key")
            or (state.get("refresh_token") and state.get("access_token"))
        )
    def get_credential(self) -> UpstreamCredential:
        with self._lock:
            state = self._read_state()
            if state is None:
                raise RuntimeError(
                    "Not logged into Nous Portal. Run `hermes login nous` first."
                )
            try:
                refreshed = refresh_nous_oauth_from_state(state)
            except Exception as exc:
                raise RuntimeError(
                    f"Failed to refresh Nous Portal credentials: {exc}"
                ) from exc
            self._save_state(refreshed)
            agent_key = refreshed.get("agent_key")
            if not agent_key:
                raise RuntimeError(
                    "Nous Portal refresh did not return a usable agent_key. "
                    "Try `hermes login nous` to re-authenticate."
                )
            base_url = refreshed.get("inference_base_url") or DEFAULT_NOUS_INFERENCE_URL
            base_url = base_url.rstrip("/")
            return UpstreamCredential(
                bearer=agent_key,
                base_url=base_url,
                expires_at=refreshed.get("agent_key_expires_at"),
            )
    # ------------------------------------------------------------------
    # Internal helpers — auth.json access. Kept local rather than added
    # to hermes_cli.auth to avoid expanding that module's public surface.
    # ------------------------------------------------------------------
    def _read_state(self) -> Optional[Dict[str, Any]]:
        try:
            store = _load_auth_store()
        except Exception as exc:
            logger.warning("proxy: failed to load auth store: %s", exc)
            return None
        providers = store.get("providers") or {}
        state = providers.get("nous")
        if not isinstance(state, dict):
            return None
        return dict(state)  # copy so the refresh helper can mutate freely
    def _save_state(self, state: Dict[str, Any]) -> None:
        try:
            store = _load_auth_store()
            providers = store.setdefault("providers", {})
            providers["nous"] = state
            _save_auth_store(store)
            _write_shared_nous_state(state)
        except Exception as exc:
            # Best effort — we still return the fresh credential. The next
            # request just won't see cached state, which means another refresh.
            logger.warning("proxy: failed to persist refreshed Nous state: %s", exc)
 __all__ = ["NousPortalAdapter"]
--- a/hermes_cli/proxy/cli.py
+++ b/hermes_cli/proxy/cli.py
@ -0,0 +1,141 @@
 """CLI handlers for the ``hermes proxy`` subcommand."""
 from __future__ import annotations
 import asyncio
 import logging
 import sys
 from typing import Any
 from hermes_cli.proxy.adapters import ADAPTERS, get_adapter
 from hermes_cli.proxy.server import (
    AIOHTTP_AVAILABLE,
    DEFAULT_HOST,
    DEFAULT_PORT,
    run_server,
 )
 logger = logging.getLogger(__name__)
 def _print_aiohttp_missing() -> None:
    print(
        "hermes proxy requires aiohttp. Install one of:\n"
        "  pip install 'hermes-agent[messaging]'\n"
        "  pip install aiohttp",
        file=sys.stderr,
    )
 def cmd_proxy_start(args: Any) -> int:
    """Run the proxy server in the foreground.
    Returns process exit code (0 on clean shutdown).
    """
    if not AIOHTTP_AVAILABLE:
        _print_aiohttp_missing()
        return 1
    provider = getattr(args, "provider", None) or "nous"
    try:
        adapter = get_adapter(provider)
    except ValueError as exc:
        print(f"Error: {exc}", file=sys.stderr)
        return 2
    if not adapter.is_authenticated():
        print(
            f"Not logged into {adapter.display_name}. "
            f"Run `hermes login {adapter.name}` first.",
            file=sys.stderr,
        )
        return 2
    host = getattr(args, "host", None) or DEFAULT_HOST
    port = getattr(args, "port", None) or DEFAULT_PORT
    print(
        f"Starting Hermes proxy for {adapter.display_name}\n"
        f"  Listening on:  http://{host}:{port}/v1\n"
        f"  Forwarding to: (resolved per-request from your subscription)\n"
        f"  Use any bearer token in the client — the proxy attaches your real credential.\n"
        f"\n"
        f"Press Ctrl+C to stop.",
        file=sys.stderr,
    )
    try:
        asyncio.run(run_server(adapter, host=host, port=port))
    except KeyboardInterrupt:
        print("\nproxy: stopped", file=sys.stderr)
    except OSError as exc:
        print(f"proxy: failed to bind {host}:{port}: {exc}", file=sys.stderr)
        return 1
    return 0
 def cmd_proxy_status(args: Any) -> int:
    """Print the status of each configured upstream adapter."""
    print("Hermes proxy upstream adapters\n")
    for name in sorted(ADAPTERS):
        adapter = get_adapter(name)
        if not adapter.is_authenticated():
            print(f"  [{name:8s}] {adapter.display_name} — not logged in")
            continue
        try:
            cred = adapter.get_credential()
        except Exception as exc:
            print(
                f"  [{name:8s}] {adapter.display_name} — credentials need attention "
                f"({exc})"
            )
            continue
        expires = f" (bearer expires {cred.expires_at})" if cred.expires_at else ""
        print(f"  [{name:8s}] {adapter.display_name} — ready{expires}")
    print(
        "\nStart the proxy with: hermes proxy start [--provider <name>]"
    )
    return 0
 def cmd_proxy_list_providers(args: Any) -> int:
    """List available proxy upstream providers."""
    print("Available proxy upstream providers:")
    for name in sorted(ADAPTERS):
        adapter = get_adapter(name)
        print(f"  {name}  — {adapter.display_name}")
    return 0
 def cmd_proxy(args: Any) -> int:
    """Dispatch ``hermes proxy <subcommand>``."""
    sub = getattr(args, "proxy_command", None)
    if sub == "start":
        return cmd_proxy_start(args)
    if sub == "status":
        return cmd_proxy_status(args)
    if sub in ("providers", "list"):
        return cmd_proxy_list_providers(args)
    # No subcommand → print short help.
    print(
        "hermes proxy — local OpenAI-compatible proxy that attaches your\n"
        "OAuth-authenticated provider credentials to outbound requests.\n"
        "\n"
        "Subcommands:\n"
        "  hermes proxy start [--provider nous] [--host 127.0.0.1] [--port 8645]\n"
        "      Run the proxy in the foreground.\n"
        "  hermes proxy status\n"
        "      Show which upstream adapters are ready.\n"
        "  hermes proxy providers\n"
        "      List available upstream providers.\n",
        file=sys.stderr,
    )
    return 0
 __all__ = [
    "cmd_proxy",
    "cmd_proxy_start",
    "cmd_proxy_status",
    "cmd_proxy_list_providers",
 ]
--- a/hermes_cli/proxy/server.py
+++ b/hermes_cli/proxy/server.py
@ -0,0 +1,265 @@
 """HTTP server that forwards OpenAI-compatible requests to a configured upstream.
 Listens on ``http://<host>:<port>/v1/<path>`` and forwards each request to
 ``<upstream-base-url>/<path>`` with the client's ``Authorization`` header
 replaced by a freshly-resolved bearer from the configured adapter. The
 response is streamed back unmodified, preserving SSE.
 The server is intentionally minimal: it does NOT mediate, log, transform,
 or rewrite request/response bodies. It's a credential-attaching forwarder.
 """
 from __future__ import annotations
 import asyncio
 import json
 import logging
 import signal
 from typing import Optional
 try:
    import aiohttp
    from aiohttp import web
    AIOHTTP_AVAILABLE = True
 except ImportError:
    aiohttp = None  # type: ignore[assignment]
    web = None  # type: ignore[assignment]
    AIOHTTP_AVAILABLE = False
 from hermes_cli.proxy.adapters.base import UpstreamAdapter
 logger = logging.getLogger(__name__)
 # Headers we strip when forwarding to the upstream. ``host``/``content-length``
 # are recomputed by aiohttp; ``authorization`` is replaced with our bearer.
 # Everything else (content-type, accept, user-agent, x-* headers) passes through.
 _HOP_BY_HOP_HEADERS = frozenset(
    {
        "host",
        "content-length",
        "connection",
        "keep-alive",
        "proxy-authenticate",
        "proxy-authorization",
        "te",
        "trailers",
        "transfer-encoding",
        "upgrade",
        "authorization",  # we replace this one
    }
 )
 DEFAULT_PORT = 8645
 DEFAULT_HOST = "127.0.0.1"
 def _json_error(status: int, message: str, code: str = "proxy_error") -> "web.Response":
    """Return an OpenAI-style error JSON response."""
    body = {"error": {"message": message, "type": code, "code": code}}
    return web.json_response(body, status=status)
 def _filter_request_headers(headers: "aiohttp.typedefs.LooseHeaders") -> dict:
    """Strip hop-by-hop + auth headers from the inbound request."""
    out = {}
    for key, value in headers.items():
        if key.lower() in _HOP_BY_HOP_HEADERS:
            continue
        out[key] = value
    return out
 def _filter_response_headers(headers) -> dict:
    """Strip hop-by-hop headers from the upstream response."""
    out = {}
    for key, value in headers.items():
        if key.lower() in _HOP_BY_HOP_HEADERS:
            continue
        # aiohttp recomputes Content-Encoding/Content-Length on stream — let it.
        if key.lower() in ("content-encoding", "content-length"):
            continue
        out[key] = value
    return out
 def create_app(adapter: UpstreamAdapter) -> "web.Application":
    """Build the aiohttp application bound to a specific upstream adapter."""
    if not AIOHTTP_AVAILABLE:
        raise RuntimeError(
            "aiohttp is required for `hermes proxy`. Install with: "
            "pip install 'hermes-agent[messaging]' or `pip install aiohttp`."
        )
    app = web.Application()
    # AppKey ensures forward-compat with future aiohttp versions that strip
    # bare-string keys.
    _adapter_key = web.AppKey("adapter", UpstreamAdapter)
    app[_adapter_key] = adapter
    async def handle_health(request: "web.Request") -> "web.Response":
        return web.json_response(
            {
                "status": "ok",
                "upstream": adapter.display_name,
                "authenticated": adapter.is_authenticated(),
            }
        )
    async def handle_models_fallback(request: "web.Request") -> "web.Response":
        # Most clients hit /v1/models on startup. If the upstream doesn't
        # serve /models, synthesize a minimal response so clients don't
        # crash. The actual forwarding path handles /models when allowed.
        return web.json_response(
            {
                "object": "list",
                "data": [],
            }
        )
    async def handle_proxy(request: "web.Request") -> "web.StreamResponse":
        # Extract the path *after* /v1
        rel_path = request.match_info.get("tail", "")
        rel_path = "/" + rel_path.lstrip("/")
        if rel_path not in adapter.allowed_paths:
            allowed = ", ".join(sorted(adapter.allowed_paths))
            return _json_error(
                404,
                f"Path /v1{rel_path} is not forwarded by this proxy. "
                f"Allowed: {allowed}",
                code="path_not_allowed",
            )
        try:
            cred = adapter.get_credential()
        except Exception as exc:
            logger.warning("proxy: credential resolution failed: %s", exc)
            return _json_error(401, str(exc), code="upstream_auth_failed")
        upstream_url = f"{cred.base_url.rstrip('/')}{rel_path}"
        # Preserve query string verbatim.
        if request.query_string:
            upstream_url = f"{upstream_url}?{request.query_string}"
        # Forward body verbatim. Read into memory once — request bodies for
        # chat/completions/embeddings are small (<1MB typically). If we ever
        # need to forward large multipart uploads we'll switch to streaming
        # the request body too.
        body = await request.read()
        fwd_headers = _filter_request_headers(request.headers)
        fwd_headers["Authorization"] = f"{cred.token_type} {cred.bearer}"
        logger.debug(
            "proxy: forwarding %s %s -> %s (body=%d bytes)",
            request.method, rel_path, upstream_url, len(body),
        )
        # Use a per-request session so connection state doesn't leak between
        # clients. Could be optimized to a shared session later.
        timeout = aiohttp.ClientTimeout(total=None, sock_connect=15, sock_read=300)
        try:
            session = aiohttp.ClientSession(timeout=timeout)
        except Exception as exc:  # pragma: no cover - aiohttp setup issue
            return _json_error(500, f"proxy session init failed: {exc}")
        try:
            upstream_resp = await session.request(
                request.method,
                upstream_url,
                data=body if body else None,
                headers=fwd_headers,
                allow_redirects=False,
            )
        except aiohttp.ClientError as exc:
            await session.close()
            logger.warning("proxy: upstream connection failed: %s", exc)
            return _json_error(502, f"upstream connection failed: {exc}",
                               code="upstream_unreachable")
        except asyncio.TimeoutError:
            await session.close()
            return _json_error(504, "upstream request timed out",
                               code="upstream_timeout")
        # Stream response back. Headers first, then chunked body.
        resp = web.StreamResponse(
            status=upstream_resp.status,
            headers=_filter_response_headers(upstream_resp.headers),
        )
        await resp.prepare(request)
        try:
            async for chunk in upstream_resp.content.iter_any():
                if chunk:
                    await resp.write(chunk)
        except (aiohttp.ClientError, asyncio.CancelledError) as exc:
            logger.warning("proxy: streaming interrupted: %s", exc)
        finally:
            upstream_resp.release()
            await session.close()
        await resp.write_eof()
        return resp
    # /health doesn't go through the upstream
    app.router.add_get("/health", handle_health)
    # Catch-all under /v1 — forwards if the path is allowed.
    app.router.add_route("*", "/v1/{tail:.*}", handle_proxy)
    return app
 async def run_server(
    adapter: UpstreamAdapter,
    host: str = DEFAULT_HOST,
    port: int = DEFAULT_PORT,
    shutdown_event: Optional[asyncio.Event] = None,
 ) -> None:
    """Run the proxy in the current event loop until shutdown_event is set.
    If shutdown_event is None, runs until cancelled (Ctrl+C or SIGTERM).
    """
    if not AIOHTTP_AVAILABLE:
        raise RuntimeError(
            "aiohttp is required for `hermes proxy`. Install with: "
            "pip install 'hermes-agent[messaging]' or `pip install aiohttp`."
        )
    app = create_app(adapter)
    runner = web.AppRunner(app, access_log=None)
    await runner.setup()
    site = web.TCPSite(runner, host=host, port=port)
    await site.start()
    logger.info(
        "proxy: listening on http://%s:%d/v1 -> %s",
        host, port, adapter.display_name,
    )
    stop_event = shutdown_event or asyncio.Event()
    # Wire signal handlers when we own the loop's lifetime.
    if shutdown_event is None:
        loop = asyncio.get_running_loop()
        for sig in (signal.SIGINT, signal.SIGTERM):
            try:
                loop.add_signal_handler(sig, stop_event.set)
            except NotImplementedError:
                # Windows / restricted environments — Ctrl+C will still
                # raise KeyboardInterrupt and unwind us.
                pass
    try:
        await stop_event.wait()
    finally:
        logger.info("proxy: shutting down")
        await runner.cleanup()
 __all__ = [
    "create_app",
    "run_server",
    "DEFAULT_HOST",
    "DEFAULT_PORT",
    "AIOHTTP_AVAILABLE",
 ]
--- a/tests/hermes_cli/test_proxy.py
+++ b/tests/hermes_cli/test_proxy.py
@ -0,0 +1,512 @@
 """Tests for the `hermes proxy` subcommand and its upstream adapters."""
 from __future__ import annotations
 import asyncio
 import json
 import os
 import threading
 from pathlib import Path
 from typing import Any, Dict
 from unittest.mock import MagicMock, patch
 import pytest
 from hermes_cli.proxy.adapters import ADAPTERS, get_adapter
 from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
 from hermes_cli.proxy.adapters.nous_portal import NousPortalAdapter
 # ---------------------------------------------------------------------------
 # Adapter registry
 # ---------------------------------------------------------------------------
 def test_registry_lists_nous():
    assert "nous" in ADAPTERS
 def test_get_adapter_returns_instance():
    adapter = get_adapter("nous")
    assert isinstance(adapter, NousPortalAdapter)
    assert isinstance(adapter, UpstreamAdapter)
 def test_get_adapter_case_insensitive():
    assert isinstance(get_adapter("NOUS"), NousPortalAdapter)
    assert isinstance(get_adapter("  Nous  "), NousPortalAdapter)
 def test_get_adapter_unknown_provider_raises():
    with pytest.raises(ValueError, match="anthropic"):
        get_adapter("anthropic")  # not yet implemented
 # ---------------------------------------------------------------------------
 # NousPortalAdapter
 # ---------------------------------------------------------------------------
 def _write_auth_store(hermes_home: Path, nous_state: Dict[str, Any]) -> Path:
    """Write an auth.json with the given nous state into a hermetic HERMES_HOME."""
    auth_path = hermes_home / "auth.json"
    auth_path.write_text(json.dumps({
        "version": 1,
        "providers": {"nous": nous_state},
    }))
    return auth_path
 def test_nous_adapter_metadata():
    adapter = NousPortalAdapter()
    assert adapter.name == "nous"
    assert adapter.display_name == "Nous Portal"
    assert "/chat/completions" in adapter.allowed_paths
    assert "/embeddings" in adapter.allowed_paths
    assert "/completions" in adapter.allowed_paths
    assert "/models" in adapter.allowed_paths
 def test_nous_adapter_not_authenticated_when_no_auth_file(tmp_path, monkeypatch):
    # HERMES_HOME is already set by conftest, but make doubly sure
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    adapter = NousPortalAdapter()
    assert not adapter.is_authenticated()
 def test_nous_adapter_not_authenticated_when_provider_missing(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    (tmp_path / "auth.json").write_text(json.dumps({
        "version": 1,
        "providers": {},
    }))
    assert not NousPortalAdapter().is_authenticated()
 def test_nous_adapter_authenticated_with_agent_key(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _write_auth_store(tmp_path, {
        "agent_key": "ov-test-key",
        "agent_key_expires_at": "2099-01-01T00:00:00Z",
        "inference_base_url": "https://inference-api.nousresearch.com/v1",
    })
    assert NousPortalAdapter().is_authenticated()
 def test_nous_adapter_authenticated_with_refresh_token_only(tmp_path, monkeypatch):
    """If access_token+refresh_token exist but no agent_key yet, we can still mint."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _write_auth_store(tmp_path, {
        "access_token": "access-tok",
        "refresh_token": "refresh-tok",
    })
    assert NousPortalAdapter().is_authenticated()
 def test_nous_adapter_get_credential_refreshes_and_persists(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _write_auth_store(tmp_path, {
        "access_token": "access-tok",
        "refresh_token": "refresh-tok",
        "client_id": "hermes-cli",
        "portal_base_url": "https://portal.nousresearch.com",
        "inference_base_url": "https://inference-api.nousresearch.com/v1",
    })
    refreshed_state = {
        "access_token": "access-tok",
        "refresh_token": "refresh-tok",
        "client_id": "hermes-cli",
        "portal_base_url": "https://portal.nousresearch.com",
        "inference_base_url": "https://inference-api.nousresearch.com/v1",
        "agent_key": "minted-bearer",
        "agent_key_expires_at": "2099-01-01T00:00:00Z",
    }
    with patch(
        "hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
        return_value=refreshed_state,
    ) as mock_refresh:
        adapter = NousPortalAdapter()
        cred = adapter.get_credential()
    mock_refresh.assert_called_once()
    assert cred.bearer == "minted-bearer"
    assert cred.base_url == "https://inference-api.nousresearch.com/v1"
    assert cred.expires_at == "2099-01-01T00:00:00Z"
    assert cred.token_type == "Bearer"
    # Verify state was persisted back
    stored = json.loads((tmp_path / "auth.json").read_text())
    assert stored["providers"]["nous"]["agent_key"] == "minted-bearer"
 def test_nous_adapter_get_credential_raises_when_not_logged_in(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    adapter = NousPortalAdapter()
    with pytest.raises(RuntimeError, match="hermes login nous"):
        adapter.get_credential()
 def test_nous_adapter_get_credential_raises_on_refresh_failure(tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _write_auth_store(tmp_path, {
        "access_token": "access-tok",
        "refresh_token": "refresh-tok",
    })
    with patch(
        "hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
        side_effect=RuntimeError("Refresh session has been revoked"),
    ):
        adapter = NousPortalAdapter()
        with pytest.raises(RuntimeError, match="Refresh session has been revoked"):
            adapter.get_credential()
 def test_nous_adapter_get_credential_raises_when_no_agent_key_returned(tmp_path, monkeypatch):
    """If the refresh helper succeeds but produces no agent_key, we surface a clear error."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _write_auth_store(tmp_path, {
        "access_token": "access-tok",
        "refresh_token": "refresh-tok",
    })
    with patch(
        "hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
        return_value={"access_token": "a", "refresh_token": "r"},
    ):
        adapter = NousPortalAdapter()
        with pytest.raises(RuntimeError, match="did not return a usable agent_key"):
            adapter.get_credential()
 def test_nous_adapter_concurrent_refresh_serialized(tmp_path, monkeypatch):
    """Two parallel get_credential() calls must serialize through the lock."""
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    _write_auth_store(tmp_path, {
        "access_token": "a", "refresh_token": "r",
    })
    call_log: list = []
    in_flight = threading.Event()
    overlap_detected = threading.Event()
    counter = [0]
    counter_lock = threading.Lock()
    def serializing_refresh(state, **kwargs):
        # If another thread is already inside refresh, the lock is broken.
        if in_flight.is_set():
            overlap_detected.set()
        in_flight.set()
        try:
            call_log.append(threading.current_thread().ident)
            # Simulate refresh latency so any race window is exposed.
            import time
            time.sleep(0.05)
            with counter_lock:
                counter[0] += 1
                idx = counter[0]
            return {
                **state,
                "agent_key": f"key-{idx}",
                "agent_key_expires_at": "2099-01-01T00:00:00Z",
                "inference_base_url": "https://inference-api.nousresearch.com/v1",
            }
        finally:
            in_flight.clear()
    adapter = NousPortalAdapter()
    results: list = []
    errors: list = []
    def worker():
        try:
            results.append(adapter.get_credential().bearer)
        except Exception as exc:  # pragma: no cover - shouldn't happen
            errors.append(exc)
    with patch(
        "hermes_cli.proxy.adapters.nous_portal.refresh_nous_oauth_from_state",
        side_effect=serializing_refresh,
    ):
        threads = [threading.Thread(target=worker) for _ in range(3)]
        for t in threads:
            t.start()
        for t in threads:
            t.join()
    assert not errors, f"workers errored: {errors}"
    assert len(results) == 3
    assert len(call_log) == 3
    assert not overlap_detected.is_set(), "refresh calls overlapped — lock is broken"
    assert all(r.startswith("key-") for r in results)
 # ---------------------------------------------------------------------------
 # Server: path filtering + forwarding
 #
 # We run the proxy AND a fake upstream as real aiohttp servers on ephemeral
 # ports. Avoids pytest-aiohttp's fixtures (extra dependency for one test file).
 # ---------------------------------------------------------------------------
 aiohttp = pytest.importorskip("aiohttp")
 from aiohttp import web  # noqa: E402
 from hermes_cli.proxy.server import create_app  # noqa: E402
 class FakeAdapter(UpstreamAdapter):
    """A test adapter that returns a fixed credential without touching disk."""
    def __init__(self, base_url: str, bearer: str = "test-bearer",
                 allowed=None, raise_on_credential=False):
        self._base_url = base_url
        self._bearer = bearer
        self._allowed = frozenset(allowed or ["/chat/completions"])
        self._raise = raise_on_credential
        self.calls = 0
    @property
    def name(self): return "fake"
    @property
    def display_name(self): return "Fake Provider"
    @property
    def allowed_paths(self): return self._allowed
    def is_authenticated(self): return True
    def get_credential(self):
        self.calls += 1
        if self._raise:
            raise RuntimeError("simulated auth failure")
        return UpstreamCredential(
            bearer=self._bearer, base_url=self._base_url,
            expires_at="2099-01-01T00:00:00Z",
        )
 async def _start_runner(app: "web.Application"):
    """Spin up an aiohttp app on an ephemeral localhost port. Returns (runner, base_url)."""
    runner = web.AppRunner(app, access_log=None)
    await runner.setup()
    site = web.TCPSite(runner, host="127.0.0.1", port=0)
    await site.start()
    sockets = list(site._server.sockets)  # type: ignore[union-attr]
    port = sockets[0].getsockname()[1]
    return runner, f"http://127.0.0.1:{port}"
 def _build_fake_upstream(captured: Dict[str, Any]) -> "web.Application":
    async def echo(request):
        body = await request.read()
        captured["requests"].append({
            "method": request.method,
            "path": request.path,
            "auth": request.headers.get("Authorization"),
            "body": body.decode("utf-8") if body else "",
        })
        return web.json_response({"echoed": True, "path": request.path})
    async def sse(request):
        resp = web.StreamResponse(
            status=200, headers={"Content-Type": "text/event-stream"},
        )
        await resp.prepare(request)
        for chunk in [b"data: hello\n\n", b"data: world\n\n", b"data: [DONE]\n\n"]:
            await resp.write(chunk)
        await resp.write_eof()
        return resp
    app = web.Application()
    app.router.add_route("*", "/v1/chat/completions", echo)
    app.router.add_route("*", "/v1/embeddings", echo)
    app.router.add_route("*", "/v1/sse", sse)
    return app
 def test_server_forwards_chat_completions():
    async def run():
        captured: Dict[str, Any] = {"requests": []}
        upstream_runner, upstream_base = await _start_runner(_build_fake_upstream(captured))
        adapter = FakeAdapter(f"{upstream_base}/v1", bearer="real-portal-key")
        proxy_runner, proxy_base = await _start_runner(create_app(adapter))
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"{proxy_base}/v1/chat/completions",
                    json={"model": "Hermes-4-70B",
                          "messages": [{"role": "user", "content": "hi"}]},
                    headers={"Authorization": "Bearer client-dummy-key"},
                ) as resp:
                    assert resp.status == 200
                    data = await resp.json()
                    assert data["echoed"] is True
            assert len(captured["requests"]) == 1
            req = captured["requests"][0]
            assert req["auth"] == "Bearer real-portal-key"
            assert "Hermes-4-70B" in req["body"]
        finally:
            await proxy_runner.cleanup()
            await upstream_runner.cleanup()
    asyncio.run(run())
 def test_server_rejects_disallowed_path():
    async def run():
        adapter = FakeAdapter("http://unused.example/v1", allowed=["/chat/completions"])
        runner, base = await _start_runner(create_app(adapter))
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(f"{base}/v1/random/endpoint") as resp:
                    assert resp.status == 404
                    body = await resp.json()
                    assert body["error"]["type"] == "path_not_allowed"
                    assert "/chat/completions" in body["error"]["message"]
        finally:
            await runner.cleanup()
    asyncio.run(run())
 def test_server_returns_401_when_adapter_fails():
    async def run():
        adapter = FakeAdapter("http://unused.example/v1", raise_on_credential=True)
        runner, base = await _start_runner(create_app(adapter))
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(f"{base}/v1/chat/completions", json={}) as resp:
                    assert resp.status == 401
                    body = await resp.json()
                    assert body["error"]["type"] == "upstream_auth_failed"
                    assert "simulated auth failure" in body["error"]["message"]
        finally:
            await runner.cleanup()
    asyncio.run(run())
 def test_server_health_endpoint():
    async def run():
        adapter = FakeAdapter("http://unused.example/v1")
        runner, base = await _start_runner(create_app(adapter))
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(f"{base}/health") as resp:
                    assert resp.status == 200
                    body = await resp.json()
                    assert body["status"] == "ok"
                    assert body["upstream"] == "Fake Provider"
                    assert body["authenticated"] is True
        finally:
            await runner.cleanup()
    asyncio.run(run())
 def test_server_streams_sse():
    async def run():
        captured: Dict[str, Any] = {"requests": []}
        upstream_runner, upstream_base = await _start_runner(_build_fake_upstream(captured))
        adapter = FakeAdapter(f"{upstream_base}/v1", allowed=["/sse"])
        proxy_runner, proxy_base = await _start_runner(create_app(adapter))
        try:
            async with aiohttp.ClientSession() as session:
                async with session.get(f"{proxy_base}/v1/sse") as resp:
                    assert resp.status == 200
                    chunks = []
                    async for chunk in resp.content.iter_any():
                        chunks.append(chunk)
                    full = b"".join(chunks)
                    assert b"data: hello" in full
                    assert b"data: [DONE]" in full
        finally:
            await proxy_runner.cleanup()
            await upstream_runner.cleanup()
    asyncio.run(run())
 def test_server_strips_client_auth_header():
    """The client's Authorization header MUST NOT reach the upstream."""
    async def run():
        captured: Dict[str, Any] = {"requests": []}
        upstream_runner, upstream_base = await _start_runner(_build_fake_upstream(captured))
        adapter = FakeAdapter(f"{upstream_base}/v1", bearer="ours")
        proxy_runner, proxy_base = await _start_runner(create_app(adapter))
        try:
            async with aiohttp.ClientSession() as session:
                async with session.post(
                    f"{proxy_base}/v1/chat/completions",
                    json={},
                    headers={"Authorization": "Bearer SHOULD_NOT_LEAK"},
                ) as resp:
                    await resp.read()
            assert captured["requests"][0]["auth"] == "Bearer ours"
            assert "SHOULD_NOT_LEAK" not in captured["requests"][0]["auth"]
        finally:
            await proxy_runner.cleanup()
            await upstream_runner.cleanup()
    asyncio.run(run())
 # ---------------------------------------------------------------------------
 # CLI handlers
 # ---------------------------------------------------------------------------
 def test_cmd_proxy_status_runs(capsys, tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    from hermes_cli.proxy.cli import cmd_proxy_status
    args = MagicMock()
    rc = cmd_proxy_status(args)
    assert rc == 0
    out = capsys.readouterr().out
    assert "nous" in out
    assert "Nous Portal" in out
    assert "not logged in" in out
 def test_cmd_proxy_providers_runs(capsys):
    from hermes_cli.proxy.cli import cmd_proxy_list_providers
    args = MagicMock()
    rc = cmd_proxy_list_providers(args)
    assert rc == 0
    out = capsys.readouterr().out
    assert "nous" in out
    assert "Nous Portal" in out
 def test_cmd_proxy_start_refuses_unknown_provider(capsys):
    from hermes_cli.proxy.cli import cmd_proxy_start
    args = MagicMock()
    args.provider = "no-such-provider"
    args.host = None
    args.port = None
    rc = cmd_proxy_start(args)
    assert rc == 2
    err = capsys.readouterr().err
    assert "no-such-provider" in err
 def test_cmd_proxy_start_refuses_when_unauthenticated(capsys, tmp_path, monkeypatch):
    monkeypatch.setenv("HERMES_HOME", str(tmp_path))
    from hermes_cli.proxy.cli import cmd_proxy_start
    args = MagicMock()
    args.provider = "nous"
    args.host = None
    args.port = None
    rc = cmd_proxy_start(args)
    assert rc == 2
    err = capsys.readouterr().err
    assert "hermes login nous" in err
--- a/website/docs/reference/cli-commands.md
+++ b/website/docs/reference/cli-commands.md
@ -40,6 +40,7 @@ hermes [global-options] <command> [subcommand/options]
 | `hermes model` | Interactively choose the default provider and model. |
 | `hermes fallback` | Manage fallback providers tried when the primary model errors. |
 | `hermes gateway` | Run or manage the messaging gateway service. |
 | `hermes proxy` | Local OpenAI-compatible proxy that attaches OAuth provider credentials. See [Subscription Proxy](../user-guide/features/subscription-proxy.md). |
 | `hermes lsp` | Manage Language Server Protocol integration (semantic diagnostics for write_file/patch). |
 | `hermes setup` | Interactive setup wizard for all or part of the configuration. |
 | `hermes whatsapp` | Configure and pair the WhatsApp bridge. |
--- a/website/docs/user-guide/features/subscription-proxy.md
+++ b/website/docs/user-guide/features/subscription-proxy.md
@ -0,0 +1,203 @@
 ---
 sidebar_position: 15
 title: "Subscription Proxy"
 description: "Use your Nous Portal subscription (or other OAuth provider) as an OpenAI-compatible endpoint for external apps"
 ---
 # Subscription Proxy
 The subscription proxy is a local HTTP server that lets external apps —
 OpenViking, Karakeep, Open WebUI, anything that speaks OpenAI-compatible
 chat completions — use your Hermes-managed provider subscription as their
 LLM endpoint. The proxy attaches the right credentials (refreshing them
 automatically) so the app never needs a static API key.
 This is different from the [API server](./api-server.md):
 | | API server | Subscription proxy |
 |---|---|---|
 | What it serves | Your agent (full toolset, memory, skills) | Raw model inference |
 | Use case | "Use Hermes as a chat backend" | "Use my Portal sub from another app" |
 | Auth | Your `API_SERVER_KEY` | Any bearer (proxy attaches the real one) |
 | Tool calls | Yes — the agent runs tools | No — passthrough only |
 Use the API server when you want the **agent** as a backend. Use the
 proxy when you just want **the model** through your subscription.
 ## Quick Start
 ### 1. Log into your provider (one-time)
 ```bash
 hermes login nous
 ```
 This opens your browser for the Nous Portal OAuth flow. Hermes stores
 the refresh token in `~/.hermes/auth.json` — the same place all Hermes
 provider logins live.
 ### 2. Start the proxy
 ```bash
 hermes proxy start
 ```
 ```
 Starting Hermes proxy for Nous Portal
  Listening on:  http://127.0.0.1:8645/v1
  Forwarding to: (resolved per-request from your subscription)
  Use any bearer token in the client — the proxy attaches your real credential.
 ```
 Leave this running in the foreground. Use `tmux`, `nohup`, or a systemd
 unit if you want it to survive logout.
 ### 3. Point your app at it
 Any OpenAI-compatible app config takes the same triple:
 ```
 Base URL:   http://127.0.0.1:8645/v1
 API key:    anything (e.g. "sk-unused")
 Model:      Hermes-4-70B    # or Hermes-4.3-36B, Hermes-4-405B
 ```
 The proxy ignores the `Authorization` header from your app and attaches
 your real Portal credential to the upstream request. Refreshes happen
 automatically when the bearer approaches expiry.
 ## Available providers
 ```bash
 hermes proxy providers
 ```
 Currently shipped: `nous` (Nous Portal). More OAuth providers can be
 added by implementing the `UpstreamAdapter` interface in
 `hermes_cli/proxy/adapters/`.
 ## Check status
 ```bash
 hermes proxy status
 ```
 ```
 Hermes proxy upstream adapters
  [nous    ] Nous Portal — ready (bearer expires 2026-05-15T06:43:21Z)
 ```
 If you see `not logged in`, run `hermes login nous`. If you see
 `credentials need attention`, your refresh token was revoked (rare —
 happens if you signed out from the Portal web UI) — just re-run
 `hermes login nous`.
 ## Allowed paths
 The proxy only forwards paths the upstream actually serves. For Nous
 Portal:
 | Path | Purpose |
 |------|---------|
 | `/v1/chat/completions` | Chat completions (streaming + non-streaming) |
 | `/v1/completions` | Legacy text completions |
 | `/v1/embeddings` | Embeddings |
 | `/v1/models` | Model list |
 Other paths (`/v1/images/generations`, `/v1/audio/speech`, etc.) return
 404 with a clear error pointing at the allowed paths. This keeps stray
 clients from leaking weird requests to the upstream.
 ## Configuring OpenViking to use Portal
 [OpenViking](https://github.com/volcengine/OpenViking) is a context
 database that needs an LLM provider for its VLM (vision/language model
 used to extract memories) and embedding model. With the proxy, you can
 point its `vlm.api_base` at your local proxy:
 Edit `~/.openviking/ov.conf`:
 ```json
 {
  "vlm": {
    "provider": "openai",
    "model": "Hermes-4-70B",
    "api_base": "http://127.0.0.1:8645/v1",
    "api_key": "unused-proxy-attaches-real-creds"
  }
 }
 ```
 Then start your proxy in a terminal alongside `openviking-server`:
 ```bash
 # Terminal 1
 hermes proxy start
 # Terminal 2
 openviking-server
 ```
 OpenViking's VLM calls now flow through your Portal subscription. The
 embedding model side still needs its own provider — Portal does serve
 `/v1/embeddings` but the model selection depends on what your tier
 supports; check `portal.nousresearch.com/models`.
 ## Configuring Karakeep (or any bookmark/summarizer app)
 [Karakeep](https://karakeep.app/) takes an OpenAI-compatible API for
 bookmark summarization. In its config:
 ```bash
 # Karakeep .env
 OPENAI_API_BASE_URL=http://127.0.0.1:8645/v1
 OPENAI_API_KEY=any-non-empty-string
 INFERENCE_TEXT_MODEL=Hermes-4-70B
 ```
 Same pattern works for Open WebUI, LobeChat, NextChat, or any other
 OpenAI-compatible client.
 ## Exposing on LAN
 By default the proxy binds `127.0.0.1` (localhost only). To let other
 machines on your network use it:
 ```bash
 hermes proxy start --host 0.0.0.0 --port 8645
 ```
 ⚠ **Be aware:** anyone on your network can now use your Portal
 subscription. The proxy has no auth of its own — it accepts any bearer.
 Use a firewall, VPN, or reverse proxy with proper auth if you expose
 this beyond your trusted network.
 ## Rate limits
 Your Portal tier's RPM/TPM limits apply across the whole proxy. The
 proxy doesn't fan out or pool — it's a single bearer with your full
 subscription quota. Monitor usage at
 [portal.nousresearch.com](https://portal.nousresearch.com).
 ## Architecture
 The proxy is intentionally minimal. Per request:
 1. Receive `POST /v1/chat/completions` from your app
 2. Look up the adapter's current credential (refresh if expiring)
 3. Forward the request body verbatim, with `Authorization: Bearer <minted-key>`
 4. Stream the response back unchanged (SSE preserved)
 No transformation. No logging of request bodies. No agent loop. The
 proxy is a credential-attaching pass-through.
 ## Future: more OAuth providers
 The adapter system is pluggable. Adding a new provider (e.g.
 HuggingFace, GitHub Copilot's chat endpoint, Anthropic via OAuth)
 requires implementing `UpstreamAdapter` in
 `hermes_cli/proxy/adapters/<provider>.py` and registering it in
 `adapters/__init__.py`. Providers that aren't OpenAI-compatible at the
 protocol level (Anthropic Messages API, for example) would need a
 transformation layer, which is out of scope for the current shape.
--- a/website/sidebars.ts
+++ b/website/sidebars.ts
@ -96,6 +96,7 @@ const sidebars: SidebarsConfig = {
          items: [
            'user-guide/features/web-dashboard',
            'user-guide/features/extending-the-dashboard',
            'user-guide/features/subscription-proxy',
          ],
        },
        {