mirror of
https://github.com/NousResearch/hermes-agent.git
synced 2026-06-09 08:21:50 +00:00
Merge origin/main into pr-27248 (resolving run_agent.py = ours)
run_agent.py taken from HEAD (the extracted forwarder structure). The 25 run_agent.py fixes that landed on main during the PR's life need to be ported into the agent/* extracted modules in follow-up commits.
This commit is contained in:
commit
152d42d1a7
355 changed files with 32716 additions and 4195 deletions
|
|
@ -1060,10 +1060,12 @@ def _generate_pkce() -> tuple:
|
|||
|
||||
def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
|
||||
"""Run Hermes-native OAuth PKCE flow and return credential state."""
|
||||
import secrets
|
||||
import time
|
||||
import webbrowser
|
||||
|
||||
verifier, challenge = _generate_pkce()
|
||||
oauth_state = secrets.token_urlsafe(32)
|
||||
|
||||
params = {
|
||||
"code": "true",
|
||||
|
|
@ -1073,7 +1075,7 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
|
|||
"scope": _OAUTH_SCOPES,
|
||||
"code_challenge": challenge,
|
||||
"code_challenge_method": "S256",
|
||||
"state": verifier,
|
||||
"state": oauth_state,
|
||||
}
|
||||
from urllib.parse import urlencode
|
||||
|
||||
|
|
@ -1110,7 +1112,12 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
|
|||
|
||||
splits = auth_code.split("#")
|
||||
code = splits[0]
|
||||
state = splits[1] if len(splits) > 1 else ""
|
||||
received_state = splits[1] if len(splits) > 1 else ""
|
||||
|
||||
# Validate state to prevent CSRF (RFC 6749 §10.12)
|
||||
if received_state != oauth_state:
|
||||
logger.warning("OAuth state mismatch — possible CSRF, aborting")
|
||||
return None
|
||||
|
||||
try:
|
||||
import urllib.request
|
||||
|
|
@ -1119,7 +1126,7 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
|
|||
"grant_type": "authorization_code",
|
||||
"client_id": _OAUTH_CLIENT_ID,
|
||||
"code": code,
|
||||
"state": state,
|
||||
"state": received_state,
|
||||
"redirect_uri": _OAUTH_REDIRECT_URI,
|
||||
"code_verifier": verifier,
|
||||
}).encode()
|
||||
|
|
|
|||
68
agent/async_utils.py
Normal file
68
agent/async_utils.py
Normal file
|
|
@ -0,0 +1,68 @@
|
|||
"""Async/sync bridging helpers.
|
||||
|
||||
The codebase has ~30 sites that schedule a coroutine onto an event loop from a
|
||||
worker thread via :func:`asyncio.run_coroutine_threadsafe`. That function can
|
||||
raise :class:`RuntimeError` (e.g. the loop was closed during a shutdown race),
|
||||
and when it does the coroutine object is never awaited and never closed —
|
||||
which triggers a ``"coroutine '<name>' was never awaited"`` RuntimeWarning and
|
||||
leaks the coroutine's frame until GC.
|
||||
|
||||
:func:`safe_schedule_threadsafe` wraps the call, closes the coroutine on
|
||||
scheduling failure, and returns ``None`` (instead of a half-formed future) so
|
||||
callers can branch cleanly:
|
||||
|
||||
fut = safe_schedule_threadsafe(coro, loop)
|
||||
if fut is None:
|
||||
return # or fallback behavior
|
||||
fut.result(timeout=5)
|
||||
|
||||
The helper deliberately does NOT also handle ``future.result()`` failures —
|
||||
that is a separate concern. Once the loop has accepted the coroutine, its
|
||||
lifecycle belongs to the loop, not the scheduling thread.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import logging
|
||||
from concurrent.futures import Future
|
||||
from typing import Any, Coroutine, Optional
|
||||
|
||||
|
||||
_DEFAULT_LOGGER = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def safe_schedule_threadsafe(
|
||||
coro: Coroutine[Any, Any, Any],
|
||||
loop: Optional[asyncio.AbstractEventLoop],
|
||||
*,
|
||||
logger: Optional[logging.Logger] = None,
|
||||
log_message: str = "Failed to schedule coroutine on loop",
|
||||
log_level: int = logging.DEBUG,
|
||||
) -> Optional[Future]:
|
||||
"""Schedule ``coro`` on ``loop`` from a sync context, leak-safe.
|
||||
|
||||
Returns the :class:`concurrent.futures.Future` on success, or ``None`` if
|
||||
the loop is missing or :func:`asyncio.run_coroutine_threadsafe` raised
|
||||
(e.g. the loop was closed during a shutdown race). In all failure paths
|
||||
the coroutine is :meth:`close`-d so it does not trigger
|
||||
``"coroutine was never awaited"`` warnings or leak its frame.
|
||||
|
||||
Callers retain full control over what to do with the returned future
|
||||
(call ``.result(timeout=...)``, attach ``add_done_callback``, ignore it
|
||||
fire-and-forget, etc.).
|
||||
"""
|
||||
log = logger if logger is not None else _DEFAULT_LOGGER
|
||||
|
||||
if loop is None:
|
||||
if asyncio.iscoroutine(coro):
|
||||
coro.close()
|
||||
log.log(log_level, "%s: loop is None", log_message)
|
||||
return None
|
||||
|
||||
try:
|
||||
return asyncio.run_coroutine_threadsafe(coro, loop)
|
||||
except Exception as exc:
|
||||
if asyncio.iscoroutine(coro):
|
||||
coro.close()
|
||||
log.log(log_level, "%s: %s", log_message, exc)
|
||||
return None
|
||||
|
|
@ -369,6 +369,21 @@ def build_or_headers(or_config: dict | None = None) -> dict:
|
|||
|
||||
return headers
|
||||
|
||||
|
||||
# NVIDIA NIM cloud billing attribution. Keep this host-gated because the
|
||||
# nvidia provider also supports local/on-prem NIM endpoints via NVIDIA_BASE_URL.
|
||||
_NVIDIA_NIM_CLOUD_HEADERS = {
|
||||
"X-BILLING-INVOKE-ORIGIN": "HermesAgent",
|
||||
}
|
||||
|
||||
|
||||
def build_nvidia_nim_headers(base_url: str | None) -> dict:
|
||||
"""Return NVIDIA NIM cloud attribution headers for build.nvidia.com traffic."""
|
||||
if base_url_host_matches(str(base_url or ""), "integrate.api.nvidia.com"):
|
||||
return dict(_NVIDIA_NIM_CLOUD_HEADERS)
|
||||
return {}
|
||||
|
||||
|
||||
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
|
||||
# referrerUrl and X-Title maps to appName in the gateway's analytics.
|
||||
from hermes_cli import __version__ as _HERMES_VERSION
|
||||
|
|
@ -409,7 +424,7 @@ NOUS_EXTRA_BODY = _nous_extra_body()
|
|||
auxiliary_is_nous: bool = False
|
||||
|
||||
# Default auxiliary models per provider
|
||||
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
|
||||
_OPENROUTER_MODEL = "google/gemini-2.5-flash"
|
||||
_NOUS_MODEL = "google/gemini-3-flash-preview"
|
||||
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
|
||||
_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
|
||||
|
|
@ -1254,6 +1269,58 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
|
|||
return api_key, base_url
|
||||
|
||||
|
||||
def _resolve_xai_oauth_for_aux() -> Optional[Tuple[str, str]]:
|
||||
"""Resolve a fresh xAI OAuth (api_key, base_url) for auxiliary clients.
|
||||
|
||||
Prefer the credential pool, matching the main runtime/provider status
|
||||
path. Some xAI OAuth logins live only as pool entries; falling straight
|
||||
to the singleton auth-store resolver would make auxiliary tasks such as
|
||||
compression report "no provider configured" even though ``hermes auth
|
||||
status`` shows xAI OAuth as logged in.
|
||||
|
||||
Falls back to ``hermes_cli.auth``'s singleton runtime resolver for older
|
||||
auth-store-only logins. Returns ``None`` if the user is not authenticated
|
||||
with xAI Grok OAuth.
|
||||
"""
|
||||
try:
|
||||
from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
|
||||
|
||||
pool = load_pool("xai-oauth")
|
||||
if pool and pool.has_credentials():
|
||||
entry = pool.select()
|
||||
if entry is not None:
|
||||
api_key = str(
|
||||
getattr(entry, "runtime_api_key", None)
|
||||
or getattr(entry, "access_token", "")
|
||||
or ""
|
||||
).strip()
|
||||
base_url = str(
|
||||
os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/")
|
||||
or os.getenv("XAI_BASE_URL", "").strip().rstrip("/")
|
||||
or getattr(entry, "runtime_base_url", None)
|
||||
or getattr(entry, "base_url", None)
|
||||
or DEFAULT_XAI_OAUTH_BASE_URL
|
||||
).strip().rstrip("/")
|
||||
if api_key and base_url:
|
||||
return api_key, base_url
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary xAI OAuth pool credential resolution failed: %s", exc)
|
||||
|
||||
try:
|
||||
from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
|
||||
|
||||
creds = resolve_xai_oauth_runtime_credentials()
|
||||
except Exception as exc:
|
||||
logger.debug("Auxiliary xAI OAuth runtime credential resolution failed: %s", exc)
|
||||
return None
|
||||
|
||||
api_key = str(creds.get("api_key") or "").strip()
|
||||
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
|
||||
if not api_key or not base_url:
|
||||
return None
|
||||
return api_key, base_url
|
||||
|
||||
|
||||
def _read_codex_access_token() -> Optional[str]:
|
||||
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store.
|
||||
|
||||
|
|
@ -1348,6 +1415,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
|
||||
extra["default_headers"] = build_nvidia_nim_headers(base_url)
|
||||
else:
|
||||
try:
|
||||
from providers import get_provider_profile as _gpf_aux
|
||||
|
|
@ -1383,6 +1452,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
from hermes_cli.models import copilot_default_headers
|
||||
|
||||
extra["default_headers"] = copilot_default_headers()
|
||||
elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
|
||||
extra["default_headers"] = build_nvidia_nim_headers(base_url)
|
||||
else:
|
||||
try:
|
||||
from providers import get_provider_profile as _gpf_aux2
|
||||
|
|
@ -1402,7 +1473,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
|
|||
|
||||
|
||||
|
||||
def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
|
||||
pool_present, entry = _select_pool_entry("openrouter")
|
||||
if pool_present:
|
||||
or_key = explicit_api_key or _pool_runtime_api_key(entry)
|
||||
|
|
@ -1412,7 +1483,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
|
|||
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
|
||||
logger.debug("Auxiliary client: OpenRouter via pool")
|
||||
return OpenAI(api_key=or_key, base_url=base_url,
|
||||
default_headers=build_or_headers()), _OPENROUTER_MODEL
|
||||
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
|
||||
|
||||
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
|
||||
if not or_key:
|
||||
|
|
@ -1420,7 +1491,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
|
|||
return None, None
|
||||
logger.debug("Auxiliary client: OpenRouter")
|
||||
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
|
||||
default_headers=build_or_headers()), _OPENROUTER_MODEL
|
||||
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
|
||||
|
||||
|
||||
def _describe_openrouter_unavailable() -> str:
|
||||
|
|
@ -1744,6 +1815,32 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
|
|||
return _fallback_client, model
|
||||
|
||||
|
||||
def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Build a CodexAuxiliaryClient for an xAI Grok OAuth-authenticated session.
|
||||
|
||||
xAI's ``/v1/responses`` endpoint speaks the OpenAI Responses API, so we
|
||||
wrap a plain ``OpenAI`` client in ``CodexAuxiliaryClient`` to translate
|
||||
``chat.completions.create()`` calls into ``responses.stream()`` requests.
|
||||
|
||||
The caller must pass an explicit model — pinning a default for Grok
|
||||
would silently rot when xAI's allowlist drifts. Returns ``(None, None)``
|
||||
when the user has not authenticated with xAI Grok OAuth.
|
||||
"""
|
||||
if not model:
|
||||
logger.warning(
|
||||
"Auxiliary client: xai-oauth requested without a model; "
|
||||
"pass model explicitly (auxiliary.<task>.model in config.yaml)."
|
||||
)
|
||||
return None, None
|
||||
resolved = _resolve_xai_oauth_for_aux()
|
||||
if resolved is None:
|
||||
return None, None
|
||||
api_key, base_url = resolved
|
||||
logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model)
|
||||
real_client = OpenAI(api_key=api_key, base_url=base_url)
|
||||
return CodexAuxiliaryClient(real_client, model), model
|
||||
|
||||
|
||||
def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
|
||||
"""Build a CodexAuxiliaryClient for an explicitly-requested model.
|
||||
|
||||
|
|
@ -2640,6 +2737,8 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
|
|||
)
|
||||
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
|
||||
async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
|
||||
elif base_url_host_matches(sync_base_url, "integrate.api.nvidia.com"):
|
||||
async_kwargs["default_headers"] = build_nvidia_nim_headers(sync_base_url)
|
||||
else:
|
||||
# Fall back to profile.default_headers for providers that declare
|
||||
# client-level headers on their ProviderProfile (e.g. attribution
|
||||
|
|
@ -2851,6 +2950,26 @@ def resolve_provider_client(
|
|||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── xAI Grok OAuth (loopback PKCE → Responses API) ───────────────
|
||||
# Without this branch, an xai-oauth main provider falls through to the
|
||||
# generic ``oauth_external`` arm below and returns ``(None, None)``,
|
||||
# silently re-routing every auxiliary task (compression, web extract,
|
||||
# session search, curator, etc.) to whatever Step-2 fallback the user
|
||||
# has configured. Users on xAI Grok OAuth would then see surprise
|
||||
# OpenRouter / Nous bills for side tasks they thought were running on
|
||||
# their xAI subscription.
|
||||
if provider == "xai-oauth":
|
||||
client, default = _build_xai_oauth_aux_client(model)
|
||||
if client is None:
|
||||
logger.warning(
|
||||
"resolve_provider_client: xai-oauth requested but no xAI "
|
||||
"OAuth token found (run: hermes model -> xAI Grok OAuth — SuperGrok Subscription)"
|
||||
)
|
||||
return None, None
|
||||
final_model = _normalize_resolved_model(model or default, provider)
|
||||
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
|
||||
else (client, final_model))
|
||||
|
||||
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
|
||||
if provider == "custom":
|
||||
if explicit_base_url:
|
||||
|
|
@ -2881,6 +3000,8 @@ def resolve_provider_client(
|
|||
extra["default_headers"] = copilot_request_headers(
|
||||
is_agent_turn=True, is_vision=is_vision
|
||||
)
|
||||
elif base_url_host_matches(custom_base, "integrate.api.nvidia.com"):
|
||||
extra["default_headers"] = build_nvidia_nim_headers(custom_base)
|
||||
else:
|
||||
# Fall back to profile.default_headers for providers that
|
||||
# declare client-level attribution headers on their profile.
|
||||
|
|
@ -2928,10 +3049,17 @@ def resolve_provider_client(
|
|||
if custom_entry:
|
||||
custom_base = custom_entry.get("base_url", "").strip()
|
||||
custom_key = custom_entry.get("api_key", "").strip()
|
||||
custom_key_env = custom_entry.get("key_env", "").strip()
|
||||
custom_key_env = (custom_entry.get("key_env") or custom_entry.get("api_key_env") or "").strip()
|
||||
if not custom_key and custom_key_env:
|
||||
custom_key = os.getenv(custom_key_env, "").strip()
|
||||
custom_key = custom_key or "no-key-required"
|
||||
if custom_key == "no-key-required":
|
||||
logger.warning(
|
||||
"resolve_provider_client: named custom provider %r has no resolvable "
|
||||
"api_key — request will be sent with placeholder no-key-required "
|
||||
"and will 401 on auth-required endpoints",
|
||||
custom_entry.get("name") or provider,
|
||||
)
|
||||
# An explicit per-task api_mode override (from _resolve_task_provider_model)
|
||||
# wins; otherwise fall back to what the provider entry declared.
|
||||
entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
|
||||
|
|
@ -3079,6 +3207,8 @@ def resolve_provider_client(
|
|||
headers.update(copilot_request_headers(
|
||||
is_agent_turn=True, is_vision=is_vision
|
||||
))
|
||||
elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
|
||||
headers.update(build_nvidia_nim_headers(base_url))
|
||||
else:
|
||||
# Fall back to profile.default_headers for providers that declare
|
||||
# client-level attribution headers on their profile (e.g. GMI
|
||||
|
|
@ -3201,6 +3331,8 @@ def resolve_provider_client(
|
|||
return resolve_provider_client("nous", model, async_mode)
|
||||
if provider == "openai-codex":
|
||||
return resolve_provider_client("openai-codex", model, async_mode)
|
||||
if provider == "xai-oauth":
|
||||
return resolve_provider_client("xai-oauth", model, async_mode)
|
||||
# Other OAuth providers not directly supported
|
||||
logger.warning("resolve_provider_client: OAuth provider %s not "
|
||||
"directly supported, try 'auto'", provider)
|
||||
|
|
@ -3275,7 +3407,7 @@ def _resolve_strict_vision_backend(
|
|||
if provider == "copilot":
|
||||
return resolve_provider_client("copilot", model, is_vision=True)
|
||||
if provider == "openrouter":
|
||||
return _try_openrouter()
|
||||
return _try_openrouter(model=model)
|
||||
if provider == "nous":
|
||||
return _try_nous(vision=True)
|
||||
if provider == "openai-codex":
|
||||
|
|
|
|||
|
|
@ -244,8 +244,21 @@ def _normalize_responses_message_status(value: Any, *, default: str = "completed
|
|||
return default
|
||||
|
||||
|
||||
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items."""
|
||||
def _chat_messages_to_responses_input(
|
||||
messages: List[Dict[str, Any]],
|
||||
*,
|
||||
is_xai_responses: bool = False,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Convert internal chat-style messages to Responses input items.
|
||||
|
||||
``is_xai_responses=True`` strips ``encrypted_content`` from replayed
|
||||
reasoning items. xAI's OAuth/SuperGrok ``/v1/responses`` surface
|
||||
rejects encrypted reasoning blobs minted by prior turns: the request
|
||||
streams an ``error`` SSE frame before ``response.created`` and the
|
||||
OpenAI SDK collapses it into a generic stream-ordering error. Native
|
||||
Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
|
||||
— keep the default off.
|
||||
"""
|
||||
items: List[Dict[str, Any]] = []
|
||||
seen_item_ids: set = set()
|
||||
|
||||
|
|
@ -271,9 +284,17 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
|
|||
if role == "assistant":
|
||||
# Replay encrypted reasoning items from previous turns
|
||||
# so the API can maintain coherent reasoning chains.
|
||||
#
|
||||
# xAI OAuth (SuperGrok/Premium) rejects replayed
|
||||
# ``encrypted_content`` reasoning items minted by prior
|
||||
# turns — see _chat_messages_to_responses_input docstring.
|
||||
# When ``is_xai_responses`` is set we drop the replay
|
||||
# entirely; Grok still reasons on each turn server-side,
|
||||
# we just don't try to thread the prior turn's encrypted
|
||||
# blob back in.
|
||||
codex_reasoning = msg.get("codex_reasoning_items")
|
||||
has_codex_reasoning = False
|
||||
if isinstance(codex_reasoning, list):
|
||||
if isinstance(codex_reasoning, list) and not is_xai_responses:
|
||||
for ri in codex_reasoning:
|
||||
if isinstance(ri, dict) and ri.get("encrypted_content"):
|
||||
item_id = ri.get("id")
|
||||
|
|
@ -726,7 +747,7 @@ def _preflight_codex_api_kwargs(
|
|||
"model", "instructions", "input", "tools", "store",
|
||||
"reasoning", "include", "max_output_tokens", "temperature",
|
||||
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
|
||||
"extra_headers",
|
||||
"extra_headers", "extra_body",
|
||||
}
|
||||
normalized: Dict[str, Any] = {
|
||||
"model": model,
|
||||
|
|
@ -776,6 +797,19 @@ def _preflight_codex_api_kwargs(
|
|||
if normalized_headers:
|
||||
normalized["extra_headers"] = normalized_headers
|
||||
|
||||
extra_body = api_kwargs.get("extra_body")
|
||||
if extra_body is not None:
|
||||
if not isinstance(extra_body, dict):
|
||||
raise ValueError("Codex Responses request 'extra_body' must be an object.")
|
||||
# Pass extra_body through verbatim — used by xAI Responses to
|
||||
# carry `prompt_cache_key` as a body-level field (the documented
|
||||
# cache-routing surface on /v1/responses). The openai SDK
|
||||
# serializes extra_body into the JSON body without per-field
|
||||
# type checks, so it survives Responses.stream() kwarg-signature
|
||||
# changes that would otherwise raise TypeError before the wire.
|
||||
if extra_body:
|
||||
normalized["extra_body"] = dict(extra_body)
|
||||
|
||||
if allow_stream:
|
||||
stream = api_kwargs.get("stream")
|
||||
if stream is not None and stream is not True:
|
||||
|
|
|
|||
|
|
@ -221,6 +221,114 @@ def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
|
|||
return json.dumps(shrunken, ensure_ascii=False)
|
||||
|
||||
|
||||
_IMAGE_PART_TYPES = frozenset({"image_url", "input_image", "image"})
|
||||
|
||||
|
||||
def _is_image_part(part: Any) -> bool:
|
||||
"""True if ``part`` is a multimodal image content block.
|
||||
|
||||
Recognizes all three shapes the agent handles:
|
||||
- OpenAI chat.completions: ``{"type": "image_url", "image_url": ...}``
|
||||
- OpenAI Responses API: ``{"type": "input_image", "image_url": "..."}``
|
||||
- Anthropic native: ``{"type": "image", "source": {...}}``
|
||||
"""
|
||||
if not isinstance(part, dict):
|
||||
return False
|
||||
return part.get("type") in _IMAGE_PART_TYPES
|
||||
|
||||
|
||||
def _content_has_images(content: Any) -> bool:
|
||||
"""True if a message's ``content`` is a multimodal list with image parts."""
|
||||
if not isinstance(content, list):
|
||||
return False
|
||||
return any(_is_image_part(p) for p in content)
|
||||
|
||||
|
||||
def _strip_images_from_content(content: Any) -> Any:
|
||||
"""Return a copy of ``content`` with every image part replaced by a
|
||||
short text placeholder.
|
||||
|
||||
- String content is returned unchanged.
|
||||
- Non-list, non-string content is returned unchanged.
|
||||
- List content: image parts become ``{"type": "text", "text": "[Attached
|
||||
image — stripped after compression]"}``; other parts are preserved as-is.
|
||||
|
||||
Input is never mutated.
|
||||
"""
|
||||
if not isinstance(content, list):
|
||||
return content
|
||||
if not any(_is_image_part(p) for p in content):
|
||||
return content
|
||||
|
||||
new_parts: List[Any] = []
|
||||
for p in content:
|
||||
if _is_image_part(p):
|
||||
new_parts.append({
|
||||
"type": "text",
|
||||
"text": "[Attached image — stripped after compression]",
|
||||
})
|
||||
else:
|
||||
new_parts.append(p)
|
||||
return new_parts
|
||||
|
||||
|
||||
def _strip_historical_media(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||||
"""Replace image parts in older messages with placeholder text.
|
||||
|
||||
The anchor is the *last* user message that has any image content. Every
|
||||
message before that anchor gets its image parts replaced with a short
|
||||
placeholder so the outgoing request stops re-shipping the same multi-MB
|
||||
base-64 image blobs on every turn.
|
||||
|
||||
If no user message carries images, the list is returned unchanged.
|
||||
If the only user message with images is the very first one (nothing
|
||||
earlier to strip), the list is returned unchanged.
|
||||
|
||||
Shallow copies of touched messages only; input is never mutated.
|
||||
Port of Kilo-Org/kilocode#9434 (adapted for the OpenAI-style message
|
||||
shape the hermes compressor emits).
|
||||
"""
|
||||
if not messages:
|
||||
return messages
|
||||
|
||||
# Find the newest user message that carries at least one image part.
|
||||
# We anchor on image-bearing user messages (not all user messages) so
|
||||
# a plain text follow-up after a big-image turn still strips the old
|
||||
# image — matching the problem kilocode#9434 set out to solve.
|
||||
anchor = -1
|
||||
for i in range(len(messages) - 1, -1, -1):
|
||||
msg = messages[i]
|
||||
if not isinstance(msg, dict):
|
||||
continue
|
||||
if msg.get("role") != "user":
|
||||
continue
|
||||
if _content_has_images(msg.get("content")):
|
||||
anchor = i
|
||||
break
|
||||
|
||||
if anchor <= 0:
|
||||
# No image-bearing user message, or it's the very first message —
|
||||
# nothing before it to strip.
|
||||
return messages
|
||||
|
||||
changed = False
|
||||
result: List[Dict[str, Any]] = []
|
||||
for i, msg in enumerate(messages):
|
||||
if i >= anchor or not isinstance(msg, dict):
|
||||
result.append(msg)
|
||||
continue
|
||||
content = msg.get("content")
|
||||
if not _content_has_images(content):
|
||||
result.append(msg)
|
||||
continue
|
||||
new_msg = msg.copy()
|
||||
new_msg["content"] = _strip_images_from_content(content)
|
||||
result.append(new_msg)
|
||||
changed = True
|
||||
|
||||
return result if changed else messages
|
||||
|
||||
|
||||
def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
|
||||
"""Create an informative 1-line summary of a tool call + result.
|
||||
|
||||
|
|
@ -1559,6 +1667,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio
|
|||
|
||||
compressed = self._sanitize_tool_pairs(compressed)
|
||||
|
||||
# Replace image parts in all compressed messages before the newest
|
||||
# image-bearing user turn with a short text placeholder. Without
|
||||
# this, tail messages keep their original multi-MB base-64 image
|
||||
# payloads forever, which can push every subsequent API request
|
||||
# past the provider's body-size limit and wedge the session.
|
||||
# Port of Kilo-Org/kilocode#9434.
|
||||
compressed = _strip_historical_media(compressed)
|
||||
|
||||
new_estimate = estimate_messages_tokens_rough(compressed)
|
||||
saved_estimate = display_tokens - new_estimate
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,28 @@ _DEFAULT_TIMEOUT_SECONDS = 900.0
|
|||
_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
|
||||
_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
|
||||
|
||||
# Stderr fingerprint of the deprecated `gh copilot` CLI extension
|
||||
# (https://github.blog/changelog/2025-09-25-upcoming-deprecation-of-gh-copilot-cli-extension).
|
||||
# We require BOTH the literal product name ("gh-copilot") AND a deprecation
|
||||
# marker, so generic stderr from the NEW `@github/copilot` CLI — whose repo
|
||||
# is github.com/github/copilot-cli and which legitimately mentions "copilot-cli"
|
||||
# in its own banners and error messages — doesn't get misclassified as the
|
||||
# deprecated extension.
|
||||
_DEPRECATION_REQUIRED = ("gh-copilot",)
|
||||
_DEPRECATION_MARKERS = (
|
||||
"has been deprecated",
|
||||
"no commands will be executed",
|
||||
)
|
||||
|
||||
|
||||
def _is_gh_copilot_deprecation_message(stderr_text: str) -> bool:
|
||||
"""True iff stderr looks like the deprecated gh-copilot extension's banner."""
|
||||
|
||||
lower = stderr_text.lower()
|
||||
if not any(req in lower for req in _DEPRECATION_REQUIRED):
|
||||
return False
|
||||
return any(marker in lower for marker in _DEPRECATION_MARKERS)
|
||||
|
||||
|
||||
def _resolve_command() -> str:
|
||||
return (
|
||||
|
|
@ -506,6 +528,21 @@ class CopilotACPClient:
|
|||
|
||||
stderr_text = "\n".join(stderr_tail).strip()
|
||||
if proc.poll() is not None and stderr_text:
|
||||
if _is_gh_copilot_deprecation_message(stderr_text):
|
||||
raise RuntimeError(
|
||||
"Hermes ACP mode requires the NEW GitHub Copilot CLI "
|
||||
"(github.com/github/copilot-cli), but the binary it just "
|
||||
"spawned is the deprecated `gh copilot` extension.\n\n"
|
||||
"Install the new CLI:\n"
|
||||
" npm install -g @github/copilot\n"
|
||||
" # then verify with: copilot --help\n\n"
|
||||
"If `copilot` already resolves to the new CLI but you still see this,\n"
|
||||
"point Hermes at it explicitly:\n"
|
||||
" export HERMES_COPILOT_ACP_COMMAND=/path/to/new/copilot\n\n"
|
||||
"Alternative: use the `copilot` provider (no ACP, hits the Copilot API\n"
|
||||
"directly with a Copilot subscription token) via `hermes setup`.\n\n"
|
||||
f"Original error:\n{stderr_text}"
|
||||
)
|
||||
raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
|
||||
raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
|
||||
|
||||
|
|
|
|||
|
|
@ -29,6 +29,7 @@ from hermes_cli.auth import (
|
|||
_resolve_zai_base_url,
|
||||
_save_auth_store,
|
||||
_save_provider_state,
|
||||
_store_provider_state,
|
||||
read_credential_pool,
|
||||
write_credential_pool,
|
||||
)
|
||||
|
|
@ -128,6 +129,9 @@ class PooledCredential:
|
|||
def from_dict(cls, provider: str, payload: Dict[str, Any]) -> "PooledCredential":
|
||||
field_names = {f.name for f in fields(cls) if f.name != "provider"}
|
||||
data = {k: payload.get(k) for k in field_names if k in payload}
|
||||
# Rehydrated last_status_at may be an ISO string from to_dict() — normalize to float epoch
|
||||
if "last_status_at" in data and isinstance(data["last_status_at"], str):
|
||||
data["last_status_at"] = _parse_absolute_timestamp(data["last_status_at"])
|
||||
extra = {k: payload[k] for k in _EXTRA_KEYS if k in payload and payload[k] is not None}
|
||||
data["extra"] = extra
|
||||
data.setdefault("id", uuid.uuid4().hex[:6])
|
||||
|
|
@ -539,6 +543,64 @@ class CredentialPool:
|
|||
logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_xai_oauth_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync an xAI OAuth pool entry from auth.json if tokens differ.
|
||||
|
||||
xAI OAuth refresh tokens are single-use. When another Hermes process
|
||||
(or another profile sharing the same auth.json) refreshes the token,
|
||||
it writes the new pair to ``providers["xai-oauth"]["tokens"]`` under
|
||||
``_auth_store_lock``. Without this resync, our in-memory pool entry
|
||||
keeps the consumed refresh_token and the next ``_refresh_entry`` call
|
||||
would replay it and get a ``refresh_token_reused``-style 4xx.
|
||||
|
||||
Only applies to entries seeded from the singleton (``loopback_pkce``);
|
||||
manually added entries (``manual:xai_pkce``) are independent
|
||||
credentials with their own refresh-token lifecycle.
|
||||
"""
|
||||
if self.provider != "xai-oauth" or entry.source != "loopback_pkce":
|
||||
return entry
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
auth_store = _load_auth_store()
|
||||
state = _load_provider_state(auth_store, "xai-oauth")
|
||||
if not isinstance(state, dict):
|
||||
return entry
|
||||
tokens = state.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return entry
|
||||
store_access = tokens.get("access_token", "")
|
||||
store_refresh = tokens.get("refresh_token", "")
|
||||
entry_access = entry.access_token or ""
|
||||
entry_refresh = entry.refresh_token or ""
|
||||
if store_access and (
|
||||
store_access != entry_access
|
||||
or (store_refresh and store_refresh != entry_refresh)
|
||||
):
|
||||
logger.debug(
|
||||
"Pool entry %s: syncing xAI OAuth tokens from auth.json "
|
||||
"(refreshed by another process)",
|
||||
entry.id,
|
||||
)
|
||||
field_updates: Dict[str, Any] = {
|
||||
"access_token": store_access,
|
||||
"refresh_token": store_refresh or entry.refresh_token,
|
||||
"last_status": None,
|
||||
"last_status_at": None,
|
||||
"last_error_code": None,
|
||||
"last_error_reason": None,
|
||||
"last_error_message": None,
|
||||
"last_error_reset_at": None,
|
||||
}
|
||||
if state.get("last_refresh"):
|
||||
field_updates["last_refresh"] = state["last_refresh"]
|
||||
updated = replace(entry, **field_updates)
|
||||
self._replace_entry(entry, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
except Exception as exc:
|
||||
logger.debug("Failed to sync xAI OAuth entry from auth.json: %s", exc)
|
||||
return entry
|
||||
|
||||
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
|
||||
"""Sync a Nous pool entry from auth.json if tokens differ.
|
||||
|
||||
|
|
@ -604,9 +666,22 @@ class CredentialPool:
|
|||
re-seeding a consumed single-use refresh token.
|
||||
|
||||
Applies to any OAuth provider whose singleton lives in auth.json
|
||||
(currently Nous and OpenAI Codex).
|
||||
(currently Nous, OpenAI Codex, and xAI Grok OAuth).
|
||||
|
||||
``set_active=False`` on every write: a pool sync-back is a
|
||||
token-rotation side effect, not the user choosing a provider.
|
||||
Using ``_save_provider_state`` (which sets ``active_provider``)
|
||||
here would mean every Nous/Codex/xAI refresh in a multi-provider
|
||||
setup silently flips the ``active_provider`` flag — the next
|
||||
``hermes`` invocation that defaults to the active provider
|
||||
(e.g. setup wizard, ``hermes auth status``) would land on
|
||||
whatever provider happened to refresh last, not whatever the
|
||||
user actually chose.
|
||||
"""
|
||||
if entry.source != "device_code":
|
||||
# Only sync entries that were seeded *from* a singleton. Manually
|
||||
# added pool entries (source="manual:*") are independent credentials
|
||||
# and must not write back to the singleton.
|
||||
if entry.source not in {"device_code", "loopback_pkce"}:
|
||||
return
|
||||
try:
|
||||
with _auth_store_lock():
|
||||
|
|
@ -632,7 +707,7 @@ class CredentialPool:
|
|||
state[extra_key] = val
|
||||
if entry.inference_base_url:
|
||||
state["inference_base_url"] = entry.inference_base_url
|
||||
_save_provider_state(auth_store, "nous", state)
|
||||
_store_provider_state(auth_store, "nous", state, set_active=False)
|
||||
|
||||
elif self.provider == "openai-codex":
|
||||
state = _load_provider_state(auth_store, "openai-codex")
|
||||
|
|
@ -646,7 +721,21 @@ class CredentialPool:
|
|||
tokens["refresh_token"] = entry.refresh_token
|
||||
if entry.last_refresh:
|
||||
state["last_refresh"] = entry.last_refresh
|
||||
_save_provider_state(auth_store, "openai-codex", state)
|
||||
_store_provider_state(auth_store, "openai-codex", state, set_active=False)
|
||||
|
||||
elif self.provider == "xai-oauth":
|
||||
state = _load_provider_state(auth_store, "xai-oauth")
|
||||
if not isinstance(state, dict):
|
||||
return
|
||||
tokens = state.get("tokens")
|
||||
if not isinstance(tokens, dict):
|
||||
return
|
||||
tokens["access_token"] = entry.access_token
|
||||
if entry.refresh_token:
|
||||
tokens["refresh_token"] = entry.refresh_token
|
||||
if entry.last_refresh:
|
||||
state["last_refresh"] = entry.last_refresh
|
||||
_store_provider_state(auth_store, "xai-oauth", state, set_active=False)
|
||||
|
||||
else:
|
||||
return
|
||||
|
|
@ -699,6 +788,25 @@ class CredentialPool:
|
|||
refresh_token=refreshed["refresh_token"],
|
||||
last_refresh=refreshed.get("last_refresh"),
|
||||
)
|
||||
elif self.provider == "xai-oauth":
|
||||
# Adopt fresher tokens from auth.json before spending the
|
||||
# refresh_token — single-use tokens consumed by another
|
||||
# process (or another profile sharing the singleton) would
|
||||
# otherwise trigger ``refresh_token_reused`` on the next
|
||||
# POST. Only meaningful for singleton-seeded entries.
|
||||
synced = self._sync_xai_oauth_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
refreshed = auth_mod.refresh_xai_oauth_pure(
|
||||
entry.access_token,
|
||||
entry.refresh_token,
|
||||
)
|
||||
updated = replace(
|
||||
entry,
|
||||
access_token=refreshed["access_token"],
|
||||
refresh_token=refreshed["refresh_token"],
|
||||
last_refresh=refreshed.get("last_refresh"),
|
||||
)
|
||||
elif self.provider == "nous":
|
||||
synced = self._sync_nous_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
|
|
@ -777,6 +885,30 @@ class CredentialPool:
|
|||
# Credentials file had a valid (non-expired) token — use it directly
|
||||
logger.debug("Credentials file has valid token, using without refresh")
|
||||
return synced
|
||||
# For xai-oauth: same race as nous — another process may have
|
||||
# consumed the refresh token between our proactive sync and the
|
||||
# HTTP call. Re-check auth.json and adopt the fresh tokens if
|
||||
# they have rotated since. Only meaningful for singleton-seeded
|
||||
# (loopback_pkce) entries; manual entries don't share state with
|
||||
# the singleton.
|
||||
if self.provider == "xai-oauth":
|
||||
synced = self._sync_xai_oauth_entry_from_auth_store(entry)
|
||||
if synced.refresh_token != entry.refresh_token:
|
||||
logger.debug(
|
||||
"xAI OAuth refresh failed but auth.json has newer tokens — adopting"
|
||||
)
|
||||
updated = replace(
|
||||
synced,
|
||||
last_status=STATUS_OK,
|
||||
last_status_at=None,
|
||||
last_error_code=None,
|
||||
last_error_reason=None,
|
||||
last_error_message=None,
|
||||
last_error_reset_at=None,
|
||||
)
|
||||
self._replace_entry(synced, updated)
|
||||
self._persist()
|
||||
return updated
|
||||
# For nous: another process may have consumed the refresh token
|
||||
# between our proactive sync and the HTTP call. Re-sync from
|
||||
# auth.json and adopt the fresh tokens if available.
|
||||
|
|
@ -829,6 +961,11 @@ class CredentialPool:
|
|||
entry.access_token,
|
||||
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
)
|
||||
if self.provider == "xai-oauth":
|
||||
return auth_mod._xai_access_token_is_expiring(
|
||||
entry.access_token,
|
||||
auth_mod.XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
|
||||
)
|
||||
if self.provider == "nous":
|
||||
# Nous refresh/mint can require network access and should happen when
|
||||
# runtime credentials are actually resolved, not merely when the pool
|
||||
|
|
@ -883,6 +1020,17 @@ class CredentialPool:
|
|||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
# For xai-oauth singleton-seeded entries, identical pattern:
|
||||
# an entry frozen as exhausted may simply be holding stale
|
||||
# tokens that another process (or a fresh `hermes model` ->
|
||||
# xAI Grok OAuth login) has since rotated in auth.json.
|
||||
if (self.provider == "xai-oauth"
|
||||
and entry.source == "loopback_pkce"
|
||||
and entry.last_status == STATUS_EXHAUSTED):
|
||||
synced = self._sync_xai_oauth_entry_from_auth_store(entry)
|
||||
if synced is not entry:
|
||||
entry = synced
|
||||
cleared_any = True
|
||||
if entry.last_status == STATUS_EXHAUSTED:
|
||||
exhausted_until = _exhausted_until(entry)
|
||||
if exhausted_until is not None and now < exhausted_until:
|
||||
|
|
@ -1394,6 +1542,37 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
|
|||
},
|
||||
)
|
||||
|
||||
elif provider == "xai-oauth":
|
||||
# When the user logs in via ``hermes model`` -> xAI Grok OAuth,
|
||||
# tokens are written to the auth.json singleton
|
||||
# (``providers["xai-oauth"]``). Surface them in the pool too so
|
||||
# ``hermes auth list`` reflects the logged-in state and so the pool
|
||||
# is the single source of truth for refresh during runtime resolution.
|
||||
if _is_suppressed(provider, "loopback_pkce"):
|
||||
return changed, active_sources
|
||||
|
||||
state = _load_provider_state(auth_store, "xai-oauth")
|
||||
tokens = state.get("tokens") if isinstance(state, dict) else None
|
||||
if isinstance(tokens, dict) and tokens.get("access_token"):
|
||||
active_sources.add("loopback_pkce")
|
||||
from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
|
||||
|
||||
base_url = DEFAULT_XAI_OAUTH_BASE_URL
|
||||
changed |= _upsert_entry(
|
||||
entries,
|
||||
provider,
|
||||
"loopback_pkce",
|
||||
{
|
||||
"source": "loopback_pkce",
|
||||
"auth_type": AUTH_TYPE_OAUTH,
|
||||
"access_token": tokens.get("access_token", ""),
|
||||
"refresh_token": tokens.get("refresh_token"),
|
||||
"base_url": base_url,
|
||||
"last_refresh": state.get("last_refresh"),
|
||||
"label": label_from_token(tokens.get("access_token", ""), "loopback_pkce"),
|
||||
},
|
||||
)
|
||||
|
||||
return changed, active_sources
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -265,6 +265,31 @@ def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
|
|||
return result
|
||||
|
||||
|
||||
def _remove_xai_oauth_loopback_pkce(provider: str, removed) -> RemovalResult:
|
||||
"""xAI OAuth tokens live in auth.json providers.xai-oauth — clear them.
|
||||
|
||||
Without this step, ``hermes auth remove xai-oauth <N>`` silently undoes
|
||||
itself: the central dispatcher only removes the in-memory pool entry,
|
||||
leaves ``providers.xai-oauth`` in auth.json intact, and on the next
|
||||
``load_pool("xai-oauth")`` call ``_seed_from_singletons`` re-seeds the
|
||||
entry from the still-present singleton — credentials reappear with no
|
||||
user feedback. Clearing the singleton in step with the suppression set
|
||||
by the central dispatcher makes the removal stick.
|
||||
|
||||
Belt-and-braces against the manual entry path: ``hermes auth add
|
||||
xai-oauth`` produces a ``manual:xai_pkce`` entry whose removal step
|
||||
falls through to "unregistered → nothing to clean up" (correct —
|
||||
manual entries are pool-only).
|
||||
"""
|
||||
result = RemovalResult()
|
||||
if _clear_auth_store_provider(provider):
|
||||
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
|
||||
result.hints.append(
|
||||
"Run `hermes model` → xAI Grok OAuth (SuperGrok Subscription) to re-authenticate if needed."
|
||||
)
|
||||
return result
|
||||
|
||||
|
||||
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
|
||||
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
|
||||
|
||||
|
|
@ -397,6 +422,11 @@ def _register_all_sources() -> None:
|
|||
remove_fn=_remove_codex_device_code,
|
||||
description="auth.json providers.openai-codex + ~/.codex/auth.json",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="xai-oauth", source_id="loopback_pkce",
|
||||
remove_fn=_remove_xai_oauth_loopback_pkce,
|
||||
description="auth.json providers.xai-oauth",
|
||||
))
|
||||
register(RemovalStep(
|
||||
provider="qwen-oauth", source_id="qwen-cli",
|
||||
remove_fn=_remove_qwen_cli,
|
||||
|
|
|
|||
|
|
@ -107,9 +107,14 @@ class _BackgroundLoop:
|
|||
|
||||
Returns the coroutine's result, or raises its exception.
|
||||
"""
|
||||
from agent.async_utils import safe_schedule_threadsafe
|
||||
if self._loop is None:
|
||||
if asyncio.iscoroutine(coro):
|
||||
coro.close()
|
||||
raise RuntimeError("background loop not started")
|
||||
fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop)
|
||||
fut = safe_schedule_threadsafe(coro, self._loop)
|
||||
if fut is None:
|
||||
raise RuntimeError("background loop not running")
|
||||
try:
|
||||
return fut.result(timeout=timeout)
|
||||
except Exception:
|
||||
|
|
|
|||
|
|
@ -213,6 +213,7 @@ DEFAULT_CONTEXT_LENGTHS = {
|
|||
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
|
||||
"grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning
|
||||
"grok-4.20": 2000000, # grok-4.20-0309-(non-)reasoning, -multi-agent-0309
|
||||
"grok-4.3": 1000000, # grok-4.3, grok-4.3-latest — 1M context per docs.x.ai
|
||||
"grok-4": 256000, # grok-4, grok-4-0709
|
||||
"grok-3": 131072, # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast
|
||||
"grok-2": 131072, # grok-2, grok-2-1212, grok-2-latest
|
||||
|
|
@ -357,6 +358,12 @@ _URL_TO_PROVIDER: Dict[str, str] = {
|
|||
"api.deepseek.com": "deepseek",
|
||||
"api.githubcopilot.com": "copilot",
|
||||
"models.github.ai": "copilot",
|
||||
# GitHub Models free tier (Azure-hosted prototyping endpoint) — same
|
||||
# canonical provider as the Copilot API. Hard per-request token cap
|
||||
# (often 8K) makes it unusable for Hermes' system prompt, but mapping
|
||||
# it here lets us recognize the endpoint and emit a targeted hint
|
||||
# instead of falling through the unknown-custom-endpoint path.
|
||||
"models.inference.ai.azure.com": "copilot",
|
||||
"api.fireworks.ai": "fireworks",
|
||||
"opencode.ai": "opencode-go",
|
||||
"api.x.ai": "xai",
|
||||
|
|
|
|||
|
|
@ -15,6 +15,18 @@ and MoonshotAI/kimi-cli#1595:
|
|||
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
|
||||
the parent. Presence of both causes "type should be defined in anyOf
|
||||
items instead of the parent schema".
|
||||
3. ``enum`` arrays on scalar-typed nodes may not contain ``null`` or empty
|
||||
strings. Strip those entries (drop the enum entirely if it becomes empty).
|
||||
4. ``$ref`` nodes may not carry sibling keywords. Moonshot expands the
|
||||
reference before validation and then rejects the node if sibling keys
|
||||
like ``description`` remain on the same node as ``$ref``. Strip every
|
||||
sibling from ``$ref`` nodes so only ``{"$ref": "..."}`` survives.
|
||||
(Ported from anomalyco/opencode#24730.)
|
||||
5. ``items`` may not be a tuple-style array (``items: [schemaA, schemaB]``
|
||||
for positional element schemas). Moonshot's schema engine requires a
|
||||
single object schema applied to every array element. Collapse tuple
|
||||
``items`` to the first element schema (or ``{}`` if the tuple is empty).
|
||||
(Ported from anomalyco/opencode#24730.)
|
||||
|
||||
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
|
||||
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
|
||||
|
|
@ -66,6 +78,16 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
|||
}
|
||||
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
|
||||
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
|
||||
elif key == "items" and isinstance(value, list):
|
||||
# Rule 5: tuple-style ``items`` arrays (positional element
|
||||
# schemas) are not accepted by Moonshot. Collapse to the
|
||||
# first element schema if present, else to ``{}``. This
|
||||
# matches opencode's behaviour for moonshotai / kimi models.
|
||||
first = value[0] if value else {}
|
||||
if isinstance(first, dict):
|
||||
repaired[key] = _repair_schema(first, is_schema=True)
|
||||
else:
|
||||
repaired[key] = first
|
||||
elif key in _SCHEMA_NODE_KEYS:
|
||||
# items / not / additionalProperties: single nested schema.
|
||||
# additionalProperties can also be a bool — leave those alone.
|
||||
|
|
@ -130,6 +152,15 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
|
|||
else:
|
||||
repaired.pop("enum")
|
||||
|
||||
# Rule 4: $ref nodes must not have sibling keywords. Moonshot expands
|
||||
# the reference before validation and then rejects the node if siblings
|
||||
# like ``description`` / ``type`` / ``default`` appear alongside $ref.
|
||||
# The referenced definition still carries its own description on the
|
||||
# target node, which Moonshot accepts.
|
||||
# (Ported from anomalyco/opencode#24730.)
|
||||
if "$ref" in repaired:
|
||||
return {"$ref": repaired["$ref"]}
|
||||
|
||||
return repaired
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -425,7 +425,7 @@ def build_skill_invocation_message(
|
|||
|
||||
loaded = _load_skill_payload(skill_info["skill_dir"], task_id=task_id)
|
||||
if not loaded:
|
||||
return f"[Failed to load skill: {skill_info['name']}]"
|
||||
return None
|
||||
|
||||
loaded_skill, skill_dir, skill_name = loaded
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,10 @@ class ResponsesApiTransport(ProviderTransport):
|
|||
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
|
||||
"""Convert OpenAI chat messages to Responses API input items."""
|
||||
from agent.codex_responses_adapter import _chat_messages_to_responses_input
|
||||
return _chat_messages_to_responses_input(messages)
|
||||
return _chat_messages_to_responses_input(
|
||||
messages,
|
||||
is_xai_responses=bool(kwargs.get("is_xai_responses")),
|
||||
)
|
||||
|
||||
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
|
||||
"""Convert OpenAI tool schemas to Responses API function definitions."""
|
||||
|
|
@ -89,24 +92,38 @@ class ResponsesApiTransport(ProviderTransport):
|
|||
_effort_clamp = {"minimal": "low"}
|
||||
reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
|
||||
|
||||
response_tools = _responses_tools(tools)
|
||||
kwargs = {
|
||||
"model": model,
|
||||
"instructions": instructions,
|
||||
"input": _chat_messages_to_responses_input(payload_messages),
|
||||
"tools": _responses_tools(tools),
|
||||
"tool_choice": "auto",
|
||||
"parallel_tool_calls": True,
|
||||
"input": _chat_messages_to_responses_input(
|
||||
payload_messages,
|
||||
is_xai_responses=is_xai_responses,
|
||||
),
|
||||
"tools": response_tools,
|
||||
"store": False,
|
||||
}
|
||||
if response_tools:
|
||||
kwargs["tool_choice"] = "auto"
|
||||
kwargs["parallel_tool_calls"] = True
|
||||
|
||||
session_id = params.get("session_id")
|
||||
if not is_github_responses and session_id:
|
||||
# xAI Responses takes prompt_cache_key in extra_body (set further
|
||||
# down); GitHub Models opts out of cache-key routing entirely.
|
||||
if not is_github_responses and not is_xai_responses and session_id:
|
||||
kwargs["prompt_cache_key"] = session_id
|
||||
|
||||
if reasoning_enabled and is_xai_responses:
|
||||
from agent.model_metadata import grok_supports_reasoning_effort
|
||||
|
||||
kwargs["include"] = ["reasoning.encrypted_content"]
|
||||
# NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
|
||||
# any more. xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
|
||||
# replayed encrypted reasoning items on turn 2+ — see
|
||||
# _chat_messages_to_responses_input docstring. Requesting the field
|
||||
# back would just have us cache something we then must strip. Grok
|
||||
# still reasons natively each turn; coherence across turns rides on
|
||||
# the visible message text alone.
|
||||
kwargs["include"] = []
|
||||
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
|
||||
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
|
||||
# those models reason natively. Only send the effort dial when
|
||||
|
|
@ -165,6 +182,17 @@ class ResponsesApiTransport(ProviderTransport):
|
|||
merged_extra_headers["x-grok-conv-id"] = session_id
|
||||
kwargs["extra_headers"] = merged_extra_headers
|
||||
|
||||
# xAI Responses cache-routing — body-level field per
|
||||
# https://docs.x.ai/developers/advanced-api-usage/prompt-caching/maximizing-cache-hits.
|
||||
# Sent via extra_body (not the typed kwarg) so it survives openai
|
||||
# SDK builds whose Responses.stream() signature has dropped the field.
|
||||
existing_extra_body = kwargs.get("extra_body")
|
||||
merged_extra_body: Dict[str, Any] = {}
|
||||
if isinstance(existing_extra_body, dict):
|
||||
merged_extra_body.update(existing_extra_body)
|
||||
merged_extra_body.setdefault("prompt_cache_key", session_id)
|
||||
kwargs["extra_body"] = merged_extra_body
|
||||
|
||||
return kwargs
|
||||
|
||||
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
|
||||
|
|
|
|||
|
|
@ -14,20 +14,28 @@ the user gets full Hermes capability inside a Codex turn.
|
|||
Scope (what we expose):
|
||||
- web_search, web_extract — Firecrawl, no codex equivalent
|
||||
- browser_navigate / _click / _type / — Camofox/Browserbase automation
|
||||
_snapshot / _screenshot / _scroll / _back / _press / _vision
|
||||
- delegate_task — Hermes subagents
|
||||
_snapshot / _scroll / _back / _press /
|
||||
_get_images / _console / _vision
|
||||
- vision_analyze — image inspection by vision model
|
||||
- image_generate — image generation
|
||||
- memory — Hermes' persistent memory store
|
||||
- skill_view, skills_list — Hermes' skill library
|
||||
- session_search — cross-session search
|
||||
- text_to_speech — TTS
|
||||
- kanban_* (complete/block/comment/ — kanban worker + orchestrator
|
||||
heartbeat/show/list/create/ handoff (stateless: read env var,
|
||||
unblock/link) write ~/.hermes/kanban.db)
|
||||
|
||||
What we DO NOT expose (codex has equivalents):
|
||||
What we DO NOT expose:
|
||||
- terminal / shell — codex's own shell tool
|
||||
- read_file / write_file / patch — codex's apply_patch + shell
|
||||
- search_files / process — codex's shell
|
||||
- clarify, todo — codex's own UX
|
||||
- clarify — codex's own UX
|
||||
- delegate_task / memory / — `_AGENT_LOOP_TOOLS` in Hermes
|
||||
session_search / todo (model_tools.py). They require
|
||||
the running AIAgent context to
|
||||
dispatch (mid-loop state), so a
|
||||
stateless MCP callback can't
|
||||
drive them. See the inline
|
||||
comment on EXPOSED_TOOLS below.
|
||||
|
||||
Run with: python -m agent.transports.hermes_tools_mcp_server
|
||||
Spawned by: CodexAppServerSession.ensure_started() when the runtime is
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue