refactor(auth): collapse Nous inference fallback controls

This commit is contained in:
Robin Fernandes 2026-05-17 20:34:39 +10:00 committed by Teknium
parent 89a3d038cf
commit 0bac7dd05b
13 changed files with 1071 additions and 240 deletions

View file

@ -26,7 +26,7 @@ except ImportError:
web = None # type: ignore[assignment]
AIOHTTP_AVAILABLE = False
from hermes_cli.proxy.adapters.base import UpstreamAdapter
from hermes_cli.proxy.adapters.base import UpstreamAdapter, UpstreamCredential
logger = logging.getLogger(__name__)
@ -136,50 +136,93 @@ def create_app(adapter: UpstreamAdapter) -> "web.Application":
logger.warning("proxy: credential resolution failed: %s", exc)
return _json_error(401, str(exc), code="upstream_auth_failed")
upstream_url = f"{cred.base_url.rstrip('/')}{rel_path}"
# Preserve query string verbatim.
if request.query_string:
upstream_url = f"{upstream_url}?{request.query_string}"
# Forward body verbatim. Read into memory once — request bodies for
# chat/completions/embeddings are small (<1MB typically). If we ever
# need to forward large multipart uploads we'll switch to streaming
# the request body too.
body = await request.read()
fwd_headers = _filter_request_headers(request.headers)
fwd_headers["Authorization"] = f"{cred.token_type} {cred.bearer}"
logger.debug(
"proxy: forwarding %s %s -> %s (body=%d bytes)",
request.method, rel_path, upstream_url, len(body),
)
# Use a per-request session so connection state doesn't leak between
# clients. Could be optimized to a shared session later.
timeout = aiohttp.ClientTimeout(total=None, sock_connect=15, sock_read=300)
try:
session = aiohttp.ClientSession(timeout=timeout)
except Exception as exc: # pragma: no cover - aiohttp setup issue
return _json_error(500, f"proxy session init failed: {exc}")
try:
upstream_resp = await session.request(
request.method,
upstream_url,
data=body if body else None,
headers=fwd_headers,
allow_redirects=False,
async def _send_upstream(active_cred: UpstreamCredential):
upstream_url = f"{active_cred.base_url.rstrip('/')}{rel_path}"
# Preserve query string verbatim.
if request.query_string:
upstream_url = f"{upstream_url}?{request.query_string}"
fwd_headers = _filter_request_headers(request.headers)
fwd_headers["Authorization"] = f"{active_cred.token_type} {active_cred.bearer}"
logger.debug(
"proxy: forwarding %s %s -> %s (body=%d bytes)",
request.method, rel_path, upstream_url, len(body),
)
except aiohttp.ClientError as exc:
await session.close()
logger.warning("proxy: upstream connection failed: %s", exc)
return _json_error(502, f"upstream connection failed: {exc}",
code="upstream_unreachable")
except asyncio.TimeoutError:
await session.close()
return _json_error(504, "upstream request timed out",
code="upstream_timeout")
try:
session = aiohttp.ClientSession(timeout=timeout)
except Exception as exc: # pragma: no cover - aiohttp setup issue
raise RuntimeError(f"proxy session init failed: {exc}") from exc
try:
upstream_resp = await session.request(
request.method,
upstream_url,
data=body if body else None,
headers=fwd_headers,
allow_redirects=False,
)
except Exception:
await session.close()
raise
return session, upstream_resp
async def _open_upstream(active_cred: UpstreamCredential):
try:
return await _send_upstream(active_cred)
except RuntimeError as exc:
return _json_error(500, str(exc)), None
except aiohttp.ClientError as exc:
logger.warning("proxy: upstream connection failed: %s", exc)
return (
_json_error(
502,
f"upstream connection failed: {exc}",
code="upstream_unreachable",
),
None,
)
except asyncio.TimeoutError:
return (
_json_error(
504,
"upstream request timed out",
code="upstream_timeout",
),
None,
)
session_or_response, upstream_resp = await _open_upstream(cred)
if upstream_resp is None:
return session_or_response
session = session_or_response
if upstream_resp.status == 401:
try:
retry_cred = adapter.get_retry_credential(
failed_credential=cred,
status_code=upstream_resp.status,
)
except Exception as exc:
logger.warning("proxy: retry credential resolution failed: %s", exc)
retry_cred = None
if retry_cred is not None:
upstream_resp.release()
await session.close()
session_or_response, upstream_resp = await _open_upstream(retry_cred)
if upstream_resp is None:
return session_or_response
session = session_or_response
# Stream response back. Headers first, then chunked body.
resp = web.StreamResponse(