Merge origin/main into pr-27248 (resolving run_agent.py = ours)

run_agent.py taken from HEAD (the extracted forwarder structure). The 25
run_agent.py fixes that landed on main during the PR's life need to be
ported into the agent/* extracted modules in follow-up commits.
This commit is contained in:
teknium1 2026-05-16 23:16:52 -07:00
commit 152d42d1a7
No known key found for this signature in database
355 changed files with 32716 additions and 4195 deletions

View file

@ -1060,10 +1060,12 @@ def _generate_pkce() -> tuple:
def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
"""Run Hermes-native OAuth PKCE flow and return credential state."""
import secrets
import time
import webbrowser
verifier, challenge = _generate_pkce()
oauth_state = secrets.token_urlsafe(32)
params = {
"code": "true",
@ -1073,7 +1075,7 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
"scope": _OAUTH_SCOPES,
"code_challenge": challenge,
"code_challenge_method": "S256",
"state": verifier,
"state": oauth_state,
}
from urllib.parse import urlencode
@ -1110,7 +1112,12 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
splits = auth_code.split("#")
code = splits[0]
state = splits[1] if len(splits) > 1 else ""
received_state = splits[1] if len(splits) > 1 else ""
# Validate state to prevent CSRF (RFC 6749 §10.12)
if received_state != oauth_state:
logger.warning("OAuth state mismatch — possible CSRF, aborting")
return None
try:
import urllib.request
@ -1119,7 +1126,7 @@ def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
"grant_type": "authorization_code",
"client_id": _OAUTH_CLIENT_ID,
"code": code,
"state": state,
"state": received_state,
"redirect_uri": _OAUTH_REDIRECT_URI,
"code_verifier": verifier,
}).encode()

68
agent/async_utils.py Normal file
View file

@ -0,0 +1,68 @@
"""Async/sync bridging helpers.
The codebase has ~30 sites that schedule a coroutine onto an event loop from a
worker thread via :func:`asyncio.run_coroutine_threadsafe`. That function can
raise :class:`RuntimeError` (e.g. the loop was closed during a shutdown race),
and when it does the coroutine object is never awaited and never closed
which triggers a ``"coroutine '<name>' was never awaited"`` RuntimeWarning and
leaks the coroutine's frame until GC.
:func:`safe_schedule_threadsafe` wraps the call, closes the coroutine on
scheduling failure, and returns ``None`` (instead of a half-formed future) so
callers can branch cleanly:
fut = safe_schedule_threadsafe(coro, loop)
if fut is None:
return # or fallback behavior
fut.result(timeout=5)
The helper deliberately does NOT also handle ``future.result()`` failures
that is a separate concern. Once the loop has accepted the coroutine, its
lifecycle belongs to the loop, not the scheduling thread.
"""
from __future__ import annotations
import asyncio
import logging
from concurrent.futures import Future
from typing import Any, Coroutine, Optional
_DEFAULT_LOGGER = logging.getLogger(__name__)
def safe_schedule_threadsafe(
coro: Coroutine[Any, Any, Any],
loop: Optional[asyncio.AbstractEventLoop],
*,
logger: Optional[logging.Logger] = None,
log_message: str = "Failed to schedule coroutine on loop",
log_level: int = logging.DEBUG,
) -> Optional[Future]:
"""Schedule ``coro`` on ``loop`` from a sync context, leak-safe.
Returns the :class:`concurrent.futures.Future` on success, or ``None`` if
the loop is missing or :func:`asyncio.run_coroutine_threadsafe` raised
(e.g. the loop was closed during a shutdown race). In all failure paths
the coroutine is :meth:`close`-d so it does not trigger
``"coroutine was never awaited"`` warnings or leak its frame.
Callers retain full control over what to do with the returned future
(call ``.result(timeout=...)``, attach ``add_done_callback``, ignore it
fire-and-forget, etc.).
"""
log = logger if logger is not None else _DEFAULT_LOGGER
if loop is None:
if asyncio.iscoroutine(coro):
coro.close()
log.log(log_level, "%s: loop is None", log_message)
return None
try:
return asyncio.run_coroutine_threadsafe(coro, loop)
except Exception as exc:
if asyncio.iscoroutine(coro):
coro.close()
log.log(log_level, "%s: %s", log_message, exc)
return None

View file

@ -369,6 +369,21 @@ def build_or_headers(or_config: dict | None = None) -> dict:
return headers
# NVIDIA NIM cloud billing attribution. Keep this host-gated because the
# nvidia provider also supports local/on-prem NIM endpoints via NVIDIA_BASE_URL.
_NVIDIA_NIM_CLOUD_HEADERS = {
"X-BILLING-INVOKE-ORIGIN": "HermesAgent",
}
def build_nvidia_nim_headers(base_url: str | None) -> dict:
"""Return NVIDIA NIM cloud attribution headers for build.nvidia.com traffic."""
if base_url_host_matches(str(base_url or ""), "integrate.api.nvidia.com"):
return dict(_NVIDIA_NIM_CLOUD_HEADERS)
return {}
# Vercel AI Gateway app attribution headers. HTTP-Referer maps to
# referrerUrl and X-Title maps to appName in the gateway's analytics.
from hermes_cli import __version__ as _HERMES_VERSION
@ -409,7 +424,7 @@ NOUS_EXTRA_BODY = _nous_extra_body()
auxiliary_is_nous: bool = False
# Default auxiliary models per provider
_OPENROUTER_MODEL = "google/gemini-3-flash-preview"
_OPENROUTER_MODEL = "google/gemini-2.5-flash"
_NOUS_MODEL = "google/gemini-3-flash-preview"
_NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1"
_ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com"
@ -1254,6 +1269,58 @@ def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[
return api_key, base_url
def _resolve_xai_oauth_for_aux() -> Optional[Tuple[str, str]]:
"""Resolve a fresh xAI OAuth (api_key, base_url) for auxiliary clients.
Prefer the credential pool, matching the main runtime/provider status
path. Some xAI OAuth logins live only as pool entries; falling straight
to the singleton auth-store resolver would make auxiliary tasks such as
compression report "no provider configured" even though ``hermes auth
status`` shows xAI OAuth as logged in.
Falls back to ``hermes_cli.auth``'s singleton runtime resolver for older
auth-store-only logins. Returns ``None`` if the user is not authenticated
with xAI Grok OAuth.
"""
try:
from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
pool = load_pool("xai-oauth")
if pool and pool.has_credentials():
entry = pool.select()
if entry is not None:
api_key = str(
getattr(entry, "runtime_api_key", None)
or getattr(entry, "access_token", "")
or ""
).strip()
base_url = str(
os.getenv("HERMES_XAI_BASE_URL", "").strip().rstrip("/")
or os.getenv("XAI_BASE_URL", "").strip().rstrip("/")
or getattr(entry, "runtime_base_url", None)
or getattr(entry, "base_url", None)
or DEFAULT_XAI_OAUTH_BASE_URL
).strip().rstrip("/")
if api_key and base_url:
return api_key, base_url
except Exception as exc:
logger.debug("Auxiliary xAI OAuth pool credential resolution failed: %s", exc)
try:
from hermes_cli.auth import resolve_xai_oauth_runtime_credentials
creds = resolve_xai_oauth_runtime_credentials()
except Exception as exc:
logger.debug("Auxiliary xAI OAuth runtime credential resolution failed: %s", exc)
return None
api_key = str(creds.get("api_key") or "").strip()
base_url = str(creds.get("base_url") or "").strip().rstrip("/")
if not api_key or not base_url:
return None
return api_key, base_url
def _read_codex_access_token() -> Optional[str]:
"""Read a valid, non-expired Codex OAuth access token from Hermes auth store.
@ -1348,6 +1415,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
extra["default_headers"] = build_nvidia_nim_headers(base_url)
else:
try:
from providers import get_provider_profile as _gpf_aux
@ -1383,6 +1452,8 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
extra["default_headers"] = build_nvidia_nim_headers(base_url)
else:
try:
from providers import get_provider_profile as _gpf_aux2
@ -1402,7 +1473,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
def _try_openrouter(explicit_api_key: str = None, model: str = None) -> Tuple[Optional[OpenAI], Optional[str]]:
pool_present, entry = _select_pool_entry("openrouter")
if pool_present:
or_key = explicit_api_key or _pool_runtime_api_key(entry)
@ -1412,7 +1483,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
base_url = _pool_runtime_base_url(entry, OPENROUTER_BASE_URL) or OPENROUTER_BASE_URL
logger.debug("Auxiliary client: OpenRouter via pool")
return OpenAI(api_key=or_key, base_url=base_url,
default_headers=build_or_headers()), _OPENROUTER_MODEL
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
or_key = explicit_api_key or os.getenv("OPENROUTER_API_KEY")
if not or_key:
@ -1420,7 +1491,7 @@ def _try_openrouter(explicit_api_key: str = None) -> Tuple[Optional[OpenAI], Opt
return None, None
logger.debug("Auxiliary client: OpenRouter")
return OpenAI(api_key=or_key, base_url=OPENROUTER_BASE_URL,
default_headers=build_or_headers()), _OPENROUTER_MODEL
default_headers=build_or_headers()), model or _OPENROUTER_MODEL
def _describe_openrouter_unavailable() -> str:
@ -1744,6 +1815,32 @@ def _try_custom_endpoint() -> Tuple[Optional[Any], Optional[str]]:
return _fallback_client, model
def _build_xai_oauth_aux_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
"""Build a CodexAuxiliaryClient for an xAI Grok OAuth-authenticated session.
xAI's ``/v1/responses`` endpoint speaks the OpenAI Responses API, so we
wrap a plain ``OpenAI`` client in ``CodexAuxiliaryClient`` to translate
``chat.completions.create()`` calls into ``responses.stream()`` requests.
The caller must pass an explicit model pinning a default for Grok
would silently rot when xAI's allowlist drifts. Returns ``(None, None)``
when the user has not authenticated with xAI Grok OAuth.
"""
if not model:
logger.warning(
"Auxiliary client: xai-oauth requested without a model; "
"pass model explicitly (auxiliary.<task>.model in config.yaml)."
)
return None, None
resolved = _resolve_xai_oauth_for_aux()
if resolved is None:
return None, None
api_key, base_url = resolved
logger.debug("Auxiliary client: xAI OAuth (%s via Responses API)", model)
real_client = OpenAI(api_key=api_key, base_url=base_url)
return CodexAuxiliaryClient(real_client, model), model
def _build_codex_client(model: str) -> Tuple[Optional[Any], Optional[str]]:
"""Build a CodexAuxiliaryClient for an explicitly-requested model.
@ -2640,6 +2737,8 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
)
elif base_url_host_matches(sync_base_url, "api.kimi.com"):
async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
elif base_url_host_matches(sync_base_url, "integrate.api.nvidia.com"):
async_kwargs["default_headers"] = build_nvidia_nim_headers(sync_base_url)
else:
# Fall back to profile.default_headers for providers that declare
# client-level headers on their ProviderProfile (e.g. attribution
@ -2851,6 +2950,26 @@ def resolve_provider_client(
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
# ── xAI Grok OAuth (loopback PKCE → Responses API) ───────────────
# Without this branch, an xai-oauth main provider falls through to the
# generic ``oauth_external`` arm below and returns ``(None, None)``,
# silently re-routing every auxiliary task (compression, web extract,
# session search, curator, etc.) to whatever Step-2 fallback the user
# has configured. Users on xAI Grok OAuth would then see surprise
# OpenRouter / Nous bills for side tasks they thought were running on
# their xAI subscription.
if provider == "xai-oauth":
client, default = _build_xai_oauth_aux_client(model)
if client is None:
logger.warning(
"resolve_provider_client: xai-oauth requested but no xAI "
"OAuth token found (run: hermes model -> xAI Grok OAuth — SuperGrok Subscription)"
)
return None, None
final_model = _normalize_resolved_model(model or default, provider)
return (_to_async_client(client, final_model, is_vision=is_vision) if async_mode
else (client, final_model))
# ── Custom endpoint (OPENAI_BASE_URL + OPENAI_API_KEY) ───────────
if provider == "custom":
if explicit_base_url:
@ -2881,6 +3000,8 @@ def resolve_provider_client(
extra["default_headers"] = copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
)
elif base_url_host_matches(custom_base, "integrate.api.nvidia.com"):
extra["default_headers"] = build_nvidia_nim_headers(custom_base)
else:
# Fall back to profile.default_headers for providers that
# declare client-level attribution headers on their profile.
@ -2928,10 +3049,17 @@ def resolve_provider_client(
if custom_entry:
custom_base = custom_entry.get("base_url", "").strip()
custom_key = custom_entry.get("api_key", "").strip()
custom_key_env = custom_entry.get("key_env", "").strip()
custom_key_env = (custom_entry.get("key_env") or custom_entry.get("api_key_env") or "").strip()
if not custom_key and custom_key_env:
custom_key = os.getenv(custom_key_env, "").strip()
custom_key = custom_key or "no-key-required"
if custom_key == "no-key-required":
logger.warning(
"resolve_provider_client: named custom provider %r has no resolvable "
"api_key — request will be sent with placeholder no-key-required "
"and will 401 on auth-required endpoints",
custom_entry.get("name") or provider,
)
# An explicit per-task api_mode override (from _resolve_task_provider_model)
# wins; otherwise fall back to what the provider entry declared.
entry_api_mode = (api_mode or custom_entry.get("api_mode") or "").strip()
@ -3079,6 +3207,8 @@ def resolve_provider_client(
headers.update(copilot_request_headers(
is_agent_turn=True, is_vision=is_vision
))
elif base_url_host_matches(base_url, "integrate.api.nvidia.com"):
headers.update(build_nvidia_nim_headers(base_url))
else:
# Fall back to profile.default_headers for providers that declare
# client-level attribution headers on their profile (e.g. GMI
@ -3201,6 +3331,8 @@ def resolve_provider_client(
return resolve_provider_client("nous", model, async_mode)
if provider == "openai-codex":
return resolve_provider_client("openai-codex", model, async_mode)
if provider == "xai-oauth":
return resolve_provider_client("xai-oauth", model, async_mode)
# Other OAuth providers not directly supported
logger.warning("resolve_provider_client: OAuth provider %s not "
"directly supported, try 'auto'", provider)
@ -3275,7 +3407,7 @@ def _resolve_strict_vision_backend(
if provider == "copilot":
return resolve_provider_client("copilot", model, is_vision=True)
if provider == "openrouter":
return _try_openrouter()
return _try_openrouter(model=model)
if provider == "nous":
return _try_nous(vision=True)
if provider == "openai-codex":

View file

@ -244,8 +244,21 @@ def _normalize_responses_message_status(value: Any, *, default: str = "completed
return default
def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items."""
def _chat_messages_to_responses_input(
messages: List[Dict[str, Any]],
*,
is_xai_responses: bool = False,
) -> List[Dict[str, Any]]:
"""Convert internal chat-style messages to Responses input items.
``is_xai_responses=True`` strips ``encrypted_content`` from replayed
reasoning items. xAI's OAuth/SuperGrok ``/v1/responses`` surface
rejects encrypted reasoning blobs minted by prior turns: the request
streams an ``error`` SSE frame before ``response.created`` and the
OpenAI SDK collapses it into a generic stream-ordering error. Native
Codex (chatgpt.com backend-api) DOES accept replayed encrypted_content
keep the default off.
"""
items: List[Dict[str, Any]] = []
seen_item_ids: set = set()
@ -271,9 +284,17 @@ def _chat_messages_to_responses_input(messages: List[Dict[str, Any]]) -> List[Di
if role == "assistant":
# Replay encrypted reasoning items from previous turns
# so the API can maintain coherent reasoning chains.
#
# xAI OAuth (SuperGrok/Premium) rejects replayed
# ``encrypted_content`` reasoning items minted by prior
# turns — see _chat_messages_to_responses_input docstring.
# When ``is_xai_responses`` is set we drop the replay
# entirely; Grok still reasons on each turn server-side,
# we just don't try to thread the prior turn's encrypted
# blob back in.
codex_reasoning = msg.get("codex_reasoning_items")
has_codex_reasoning = False
if isinstance(codex_reasoning, list):
if isinstance(codex_reasoning, list) and not is_xai_responses:
for ri in codex_reasoning:
if isinstance(ri, dict) and ri.get("encrypted_content"):
item_id = ri.get("id")
@ -726,7 +747,7 @@ def _preflight_codex_api_kwargs(
"model", "instructions", "input", "tools", "store",
"reasoning", "include", "max_output_tokens", "temperature",
"tool_choice", "parallel_tool_calls", "prompt_cache_key", "service_tier",
"extra_headers",
"extra_headers", "extra_body",
}
normalized: Dict[str, Any] = {
"model": model,
@ -776,6 +797,19 @@ def _preflight_codex_api_kwargs(
if normalized_headers:
normalized["extra_headers"] = normalized_headers
extra_body = api_kwargs.get("extra_body")
if extra_body is not None:
if not isinstance(extra_body, dict):
raise ValueError("Codex Responses request 'extra_body' must be an object.")
# Pass extra_body through verbatim — used by xAI Responses to
# carry `prompt_cache_key` as a body-level field (the documented
# cache-routing surface on /v1/responses). The openai SDK
# serializes extra_body into the JSON body without per-field
# type checks, so it survives Responses.stream() kwarg-signature
# changes that would otherwise raise TypeError before the wire.
if extra_body:
normalized["extra_body"] = dict(extra_body)
if allow_stream:
stream = api_kwargs.get("stream")
if stream is not None and stream is not True:

View file

@ -221,6 +221,114 @@ def _truncate_tool_call_args_json(args: str, head_chars: int = 200) -> str:
return json.dumps(shrunken, ensure_ascii=False)
_IMAGE_PART_TYPES = frozenset({"image_url", "input_image", "image"})
def _is_image_part(part: Any) -> bool:
"""True if ``part`` is a multimodal image content block.
Recognizes all three shapes the agent handles:
- OpenAI chat.completions: ``{"type": "image_url", "image_url": ...}``
- OpenAI Responses API: ``{"type": "input_image", "image_url": "..."}``
- Anthropic native: ``{"type": "image", "source": {...}}``
"""
if not isinstance(part, dict):
return False
return part.get("type") in _IMAGE_PART_TYPES
def _content_has_images(content: Any) -> bool:
"""True if a message's ``content`` is a multimodal list with image parts."""
if not isinstance(content, list):
return False
return any(_is_image_part(p) for p in content)
def _strip_images_from_content(content: Any) -> Any:
"""Return a copy of ``content`` with every image part replaced by a
short text placeholder.
- String content is returned unchanged.
- Non-list, non-string content is returned unchanged.
- List content: image parts become ``{"type": "text", "text": "[Attached
image stripped after compression]"}``; other parts are preserved as-is.
Input is never mutated.
"""
if not isinstance(content, list):
return content
if not any(_is_image_part(p) for p in content):
return content
new_parts: List[Any] = []
for p in content:
if _is_image_part(p):
new_parts.append({
"type": "text",
"text": "[Attached image — stripped after compression]",
})
else:
new_parts.append(p)
return new_parts
def _strip_historical_media(messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
"""Replace image parts in older messages with placeholder text.
The anchor is the *last* user message that has any image content. Every
message before that anchor gets its image parts replaced with a short
placeholder so the outgoing request stops re-shipping the same multi-MB
base-64 image blobs on every turn.
If no user message carries images, the list is returned unchanged.
If the only user message with images is the very first one (nothing
earlier to strip), the list is returned unchanged.
Shallow copies of touched messages only; input is never mutated.
Port of Kilo-Org/kilocode#9434 (adapted for the OpenAI-style message
shape the hermes compressor emits).
"""
if not messages:
return messages
# Find the newest user message that carries at least one image part.
# We anchor on image-bearing user messages (not all user messages) so
# a plain text follow-up after a big-image turn still strips the old
# image — matching the problem kilocode#9434 set out to solve.
anchor = -1
for i in range(len(messages) - 1, -1, -1):
msg = messages[i]
if not isinstance(msg, dict):
continue
if msg.get("role") != "user":
continue
if _content_has_images(msg.get("content")):
anchor = i
break
if anchor <= 0:
# No image-bearing user message, or it's the very first message —
# nothing before it to strip.
return messages
changed = False
result: List[Dict[str, Any]] = []
for i, msg in enumerate(messages):
if i >= anchor or not isinstance(msg, dict):
result.append(msg)
continue
content = msg.get("content")
if not _content_has_images(content):
result.append(msg)
continue
new_msg = msg.copy()
new_msg["content"] = _strip_images_from_content(content)
result.append(new_msg)
changed = True
return result if changed else messages
def _summarize_tool_result(tool_name: str, tool_args: str, tool_content: str) -> str:
"""Create an informative 1-line summary of a tool call + result.
@ -1559,6 +1667,14 @@ The user has requested that this compaction PRIORITISE preserving all informatio
compressed = self._sanitize_tool_pairs(compressed)
# Replace image parts in all compressed messages before the newest
# image-bearing user turn with a short text placeholder. Without
# this, tail messages keep their original multi-MB base-64 image
# payloads forever, which can push every subsequent API request
# past the provider's body-size limit and wedge the session.
# Port of Kilo-Org/kilocode#9434.
compressed = _strip_historical_media(compressed)
new_estimate = estimate_messages_tokens_rough(compressed)
saved_estimate = display_tokens - new_estimate

View file

@ -30,6 +30,28 @@ _DEFAULT_TIMEOUT_SECONDS = 900.0
_TOOL_CALL_BLOCK_RE = re.compile(r"<tool_call>\s*(\{.*?\})\s*</tool_call>", re.DOTALL)
_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
# Stderr fingerprint of the deprecated `gh copilot` CLI extension
# (https://github.blog/changelog/2025-09-25-upcoming-deprecation-of-gh-copilot-cli-extension).
# We require BOTH the literal product name ("gh-copilot") AND a deprecation
# marker, so generic stderr from the NEW `@github/copilot` CLI — whose repo
# is github.com/github/copilot-cli and which legitimately mentions "copilot-cli"
# in its own banners and error messages — doesn't get misclassified as the
# deprecated extension.
_DEPRECATION_REQUIRED = ("gh-copilot",)
_DEPRECATION_MARKERS = (
"has been deprecated",
"no commands will be executed",
)
def _is_gh_copilot_deprecation_message(stderr_text: str) -> bool:
"""True iff stderr looks like the deprecated gh-copilot extension's banner."""
lower = stderr_text.lower()
if not any(req in lower for req in _DEPRECATION_REQUIRED):
return False
return any(marker in lower for marker in _DEPRECATION_MARKERS)
def _resolve_command() -> str:
return (
@ -506,6 +528,21 @@ class CopilotACPClient:
stderr_text = "\n".join(stderr_tail).strip()
if proc.poll() is not None and stderr_text:
if _is_gh_copilot_deprecation_message(stderr_text):
raise RuntimeError(
"Hermes ACP mode requires the NEW GitHub Copilot CLI "
"(github.com/github/copilot-cli), but the binary it just "
"spawned is the deprecated `gh copilot` extension.\n\n"
"Install the new CLI:\n"
" npm install -g @github/copilot\n"
" # then verify with: copilot --help\n\n"
"If `copilot` already resolves to the new CLI but you still see this,\n"
"point Hermes at it explicitly:\n"
" export HERMES_COPILOT_ACP_COMMAND=/path/to/new/copilot\n\n"
"Alternative: use the `copilot` provider (no ACP, hits the Copilot API\n"
"directly with a Copilot subscription token) via `hermes setup`.\n\n"
f"Original error:\n{stderr_text}"
)
raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")

View file

@ -29,6 +29,7 @@ from hermes_cli.auth import (
_resolve_zai_base_url,
_save_auth_store,
_save_provider_state,
_store_provider_state,
read_credential_pool,
write_credential_pool,
)
@ -128,6 +129,9 @@ class PooledCredential:
def from_dict(cls, provider: str, payload: Dict[str, Any]) -> "PooledCredential":
field_names = {f.name for f in fields(cls) if f.name != "provider"}
data = {k: payload.get(k) for k in field_names if k in payload}
# Rehydrated last_status_at may be an ISO string from to_dict() — normalize to float epoch
if "last_status_at" in data and isinstance(data["last_status_at"], str):
data["last_status_at"] = _parse_absolute_timestamp(data["last_status_at"])
extra = {k: payload[k] for k in _EXTRA_KEYS if k in payload and payload[k] is not None}
data["extra"] = extra
data.setdefault("id", uuid.uuid4().hex[:6])
@ -539,6 +543,64 @@ class CredentialPool:
logger.debug("Failed to sync Codex entry from auth.json: %s", exc)
return entry
def _sync_xai_oauth_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
"""Sync an xAI OAuth pool entry from auth.json if tokens differ.
xAI OAuth refresh tokens are single-use. When another Hermes process
(or another profile sharing the same auth.json) refreshes the token,
it writes the new pair to ``providers["xai-oauth"]["tokens"]`` under
``_auth_store_lock``. Without this resync, our in-memory pool entry
keeps the consumed refresh_token and the next ``_refresh_entry`` call
would replay it and get a ``refresh_token_reused``-style 4xx.
Only applies to entries seeded from the singleton (``loopback_pkce``);
manually added entries (``manual:xai_pkce``) are independent
credentials with their own refresh-token lifecycle.
"""
if self.provider != "xai-oauth" or entry.source != "loopback_pkce":
return entry
try:
with _auth_store_lock():
auth_store = _load_auth_store()
state = _load_provider_state(auth_store, "xai-oauth")
if not isinstance(state, dict):
return entry
tokens = state.get("tokens")
if not isinstance(tokens, dict):
return entry
store_access = tokens.get("access_token", "")
store_refresh = tokens.get("refresh_token", "")
entry_access = entry.access_token or ""
entry_refresh = entry.refresh_token or ""
if store_access and (
store_access != entry_access
or (store_refresh and store_refresh != entry_refresh)
):
logger.debug(
"Pool entry %s: syncing xAI OAuth tokens from auth.json "
"(refreshed by another process)",
entry.id,
)
field_updates: Dict[str, Any] = {
"access_token": store_access,
"refresh_token": store_refresh or entry.refresh_token,
"last_status": None,
"last_status_at": None,
"last_error_code": None,
"last_error_reason": None,
"last_error_message": None,
"last_error_reset_at": None,
}
if state.get("last_refresh"):
field_updates["last_refresh"] = state["last_refresh"]
updated = replace(entry, **field_updates)
self._replace_entry(entry, updated)
self._persist()
return updated
except Exception as exc:
logger.debug("Failed to sync xAI OAuth entry from auth.json: %s", exc)
return entry
def _sync_nous_entry_from_auth_store(self, entry: PooledCredential) -> PooledCredential:
"""Sync a Nous pool entry from auth.json if tokens differ.
@ -604,9 +666,22 @@ class CredentialPool:
re-seeding a consumed single-use refresh token.
Applies to any OAuth provider whose singleton lives in auth.json
(currently Nous and OpenAI Codex).
(currently Nous, OpenAI Codex, and xAI Grok OAuth).
``set_active=False`` on every write: a pool sync-back is a
token-rotation side effect, not the user choosing a provider.
Using ``_save_provider_state`` (which sets ``active_provider``)
here would mean every Nous/Codex/xAI refresh in a multi-provider
setup silently flips the ``active_provider`` flag the next
``hermes`` invocation that defaults to the active provider
(e.g. setup wizard, ``hermes auth status``) would land on
whatever provider happened to refresh last, not whatever the
user actually chose.
"""
if entry.source != "device_code":
# Only sync entries that were seeded *from* a singleton. Manually
# added pool entries (source="manual:*") are independent credentials
# and must not write back to the singleton.
if entry.source not in {"device_code", "loopback_pkce"}:
return
try:
with _auth_store_lock():
@ -632,7 +707,7 @@ class CredentialPool:
state[extra_key] = val
if entry.inference_base_url:
state["inference_base_url"] = entry.inference_base_url
_save_provider_state(auth_store, "nous", state)
_store_provider_state(auth_store, "nous", state, set_active=False)
elif self.provider == "openai-codex":
state = _load_provider_state(auth_store, "openai-codex")
@ -646,7 +721,21 @@ class CredentialPool:
tokens["refresh_token"] = entry.refresh_token
if entry.last_refresh:
state["last_refresh"] = entry.last_refresh
_save_provider_state(auth_store, "openai-codex", state)
_store_provider_state(auth_store, "openai-codex", state, set_active=False)
elif self.provider == "xai-oauth":
state = _load_provider_state(auth_store, "xai-oauth")
if not isinstance(state, dict):
return
tokens = state.get("tokens")
if not isinstance(tokens, dict):
return
tokens["access_token"] = entry.access_token
if entry.refresh_token:
tokens["refresh_token"] = entry.refresh_token
if entry.last_refresh:
state["last_refresh"] = entry.last_refresh
_store_provider_state(auth_store, "xai-oauth", state, set_active=False)
else:
return
@ -699,6 +788,25 @@ class CredentialPool:
refresh_token=refreshed["refresh_token"],
last_refresh=refreshed.get("last_refresh"),
)
elif self.provider == "xai-oauth":
# Adopt fresher tokens from auth.json before spending the
# refresh_token — single-use tokens consumed by another
# process (or another profile sharing the singleton) would
# otherwise trigger ``refresh_token_reused`` on the next
# POST. Only meaningful for singleton-seeded entries.
synced = self._sync_xai_oauth_entry_from_auth_store(entry)
if synced is not entry:
entry = synced
refreshed = auth_mod.refresh_xai_oauth_pure(
entry.access_token,
entry.refresh_token,
)
updated = replace(
entry,
access_token=refreshed["access_token"],
refresh_token=refreshed["refresh_token"],
last_refresh=refreshed.get("last_refresh"),
)
elif self.provider == "nous":
synced = self._sync_nous_entry_from_auth_store(entry)
if synced is not entry:
@ -777,6 +885,30 @@ class CredentialPool:
# Credentials file had a valid (non-expired) token — use it directly
logger.debug("Credentials file has valid token, using without refresh")
return synced
# For xai-oauth: same race as nous — another process may have
# consumed the refresh token between our proactive sync and the
# HTTP call. Re-check auth.json and adopt the fresh tokens if
# they have rotated since. Only meaningful for singleton-seeded
# (loopback_pkce) entries; manual entries don't share state with
# the singleton.
if self.provider == "xai-oauth":
synced = self._sync_xai_oauth_entry_from_auth_store(entry)
if synced.refresh_token != entry.refresh_token:
logger.debug(
"xAI OAuth refresh failed but auth.json has newer tokens — adopting"
)
updated = replace(
synced,
last_status=STATUS_OK,
last_status_at=None,
last_error_code=None,
last_error_reason=None,
last_error_message=None,
last_error_reset_at=None,
)
self._replace_entry(synced, updated)
self._persist()
return updated
# For nous: another process may have consumed the refresh token
# between our proactive sync and the HTTP call. Re-sync from
# auth.json and adopt the fresh tokens if available.
@ -829,6 +961,11 @@ class CredentialPool:
entry.access_token,
CODEX_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
)
if self.provider == "xai-oauth":
return auth_mod._xai_access_token_is_expiring(
entry.access_token,
auth_mod.XAI_ACCESS_TOKEN_REFRESH_SKEW_SECONDS,
)
if self.provider == "nous":
# Nous refresh/mint can require network access and should happen when
# runtime credentials are actually resolved, not merely when the pool
@ -883,6 +1020,17 @@ class CredentialPool:
if synced is not entry:
entry = synced
cleared_any = True
# For xai-oauth singleton-seeded entries, identical pattern:
# an entry frozen as exhausted may simply be holding stale
# tokens that another process (or a fresh `hermes model` ->
# xAI Grok OAuth login) has since rotated in auth.json.
if (self.provider == "xai-oauth"
and entry.source == "loopback_pkce"
and entry.last_status == STATUS_EXHAUSTED):
synced = self._sync_xai_oauth_entry_from_auth_store(entry)
if synced is not entry:
entry = synced
cleared_any = True
if entry.last_status == STATUS_EXHAUSTED:
exhausted_until = _exhausted_until(entry)
if exhausted_until is not None and now < exhausted_until:
@ -1394,6 +1542,37 @@ def _seed_from_singletons(provider: str, entries: List[PooledCredential]) -> Tup
},
)
elif provider == "xai-oauth":
# When the user logs in via ``hermes model`` -> xAI Grok OAuth,
# tokens are written to the auth.json singleton
# (``providers["xai-oauth"]``). Surface them in the pool too so
# ``hermes auth list`` reflects the logged-in state and so the pool
# is the single source of truth for refresh during runtime resolution.
if _is_suppressed(provider, "loopback_pkce"):
return changed, active_sources
state = _load_provider_state(auth_store, "xai-oauth")
tokens = state.get("tokens") if isinstance(state, dict) else None
if isinstance(tokens, dict) and tokens.get("access_token"):
active_sources.add("loopback_pkce")
from hermes_cli.auth import DEFAULT_XAI_OAUTH_BASE_URL
base_url = DEFAULT_XAI_OAUTH_BASE_URL
changed |= _upsert_entry(
entries,
provider,
"loopback_pkce",
{
"source": "loopback_pkce",
"auth_type": AUTH_TYPE_OAUTH,
"access_token": tokens.get("access_token", ""),
"refresh_token": tokens.get("refresh_token"),
"base_url": base_url,
"last_refresh": state.get("last_refresh"),
"label": label_from_token(tokens.get("access_token", ""), "loopback_pkce"),
},
)
return changed, active_sources

View file

@ -265,6 +265,31 @@ def _remove_minimax_oauth(provider: str, removed) -> RemovalResult:
return result
def _remove_xai_oauth_loopback_pkce(provider: str, removed) -> RemovalResult:
"""xAI OAuth tokens live in auth.json providers.xai-oauth — clear them.
Without this step, ``hermes auth remove xai-oauth <N>`` silently undoes
itself: the central dispatcher only removes the in-memory pool entry,
leaves ``providers.xai-oauth`` in auth.json intact, and on the next
``load_pool("xai-oauth")`` call ``_seed_from_singletons`` re-seeds the
entry from the still-present singleton credentials reappear with no
user feedback. Clearing the singleton in step with the suppression set
by the central dispatcher makes the removal stick.
Belt-and-braces against the manual entry path: ``hermes auth add
xai-oauth`` produces a ``manual:xai_pkce`` entry whose removal step
falls through to "unregistered → nothing to clean up" (correct
manual entries are pool-only).
"""
result = RemovalResult()
if _clear_auth_store_provider(provider):
result.cleaned.append(f"Cleared {provider} OAuth tokens from auth store")
result.hints.append(
"Run `hermes model` → xAI Grok OAuth (SuperGrok Subscription) to re-authenticate if needed."
)
return result
def _remove_codex_device_code(provider: str, removed) -> RemovalResult:
"""Codex tokens live in TWO places: our auth store AND ~/.codex/auth.json.
@ -397,6 +422,11 @@ def _register_all_sources() -> None:
remove_fn=_remove_codex_device_code,
description="auth.json providers.openai-codex + ~/.codex/auth.json",
))
register(RemovalStep(
provider="xai-oauth", source_id="loopback_pkce",
remove_fn=_remove_xai_oauth_loopback_pkce,
description="auth.json providers.xai-oauth",
))
register(RemovalStep(
provider="qwen-oauth", source_id="qwen-cli",
remove_fn=_remove_qwen_cli,

View file

@ -107,9 +107,14 @@ class _BackgroundLoop:
Returns the coroutine's result, or raises its exception.
"""
from agent.async_utils import safe_schedule_threadsafe
if self._loop is None:
if asyncio.iscoroutine(coro):
coro.close()
raise RuntimeError("background loop not started")
fut: ConcurrentFuture = asyncio.run_coroutine_threadsafe(coro, self._loop)
fut = safe_schedule_threadsafe(coro, self._loop)
if fut is None:
raise RuntimeError("background loop not running")
try:
return fut.result(timeout=timeout)
except Exception:

View file

@ -213,6 +213,7 @@ DEFAULT_CONTEXT_LENGTHS = {
"grok-2-vision": 8192, # grok-2-vision, -1212, -latest
"grok-4-fast": 2000000, # grok-4-fast-(non-)reasoning
"grok-4.20": 2000000, # grok-4.20-0309-(non-)reasoning, -multi-agent-0309
"grok-4.3": 1000000, # grok-4.3, grok-4.3-latest — 1M context per docs.x.ai
"grok-4": 256000, # grok-4, grok-4-0709
"grok-3": 131072, # grok-3, grok-3-mini, grok-3-fast, grok-3-mini-fast
"grok-2": 131072, # grok-2, grok-2-1212, grok-2-latest
@ -357,6 +358,12 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"api.deepseek.com": "deepseek",
"api.githubcopilot.com": "copilot",
"models.github.ai": "copilot",
# GitHub Models free tier (Azure-hosted prototyping endpoint) — same
# canonical provider as the Copilot API. Hard per-request token cap
# (often 8K) makes it unusable for Hermes' system prompt, but mapping
# it here lets us recognize the endpoint and emit a targeted hint
# instead of falling through the unknown-custom-endpoint path.
"models.inference.ai.azure.com": "copilot",
"api.fireworks.ai": "fireworks",
"opencode.ai": "opencode-go",
"api.x.ai": "xai",

View file

@ -15,6 +15,18 @@ and MoonshotAI/kimi-cli#1595:
2. When ``anyOf`` is used, ``type`` must be on the ``anyOf`` children, not
the parent. Presence of both causes "type should be defined in anyOf
items instead of the parent schema".
3. ``enum`` arrays on scalar-typed nodes may not contain ``null`` or empty
strings. Strip those entries (drop the enum entirely if it becomes empty).
4. ``$ref`` nodes may not carry sibling keywords. Moonshot expands the
reference before validation and then rejects the node if sibling keys
like ``description`` remain on the same node as ``$ref``. Strip every
sibling from ``$ref`` nodes so only ``{"$ref": "..."}`` survives.
(Ported from anomalyco/opencode#24730.)
5. ``items`` may not be a tuple-style array (``items: [schemaA, schemaB]``
for positional element schemas). Moonshot's schema engine requires a
single object schema applied to every array element. Collapse tuple
``items`` to the first element schema (or ``{}`` if the tuple is empty).
(Ported from anomalyco/opencode#24730.)
The ``#/definitions/...`` → ``#/$defs/...`` rewrite for draft-07 refs is
handled separately in ``tools/mcp_tool._normalize_mcp_input_schema`` so it
@ -66,6 +78,16 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
}
elif key in _SCHEMA_LIST_KEYS and isinstance(value, list):
repaired[key] = [_repair_schema(v, is_schema=True) for v in value]
elif key == "items" and isinstance(value, list):
# Rule 5: tuple-style ``items`` arrays (positional element
# schemas) are not accepted by Moonshot. Collapse to the
# first element schema if present, else to ``{}``. This
# matches opencode's behaviour for moonshotai / kimi models.
first = value[0] if value else {}
if isinstance(first, dict):
repaired[key] = _repair_schema(first, is_schema=True)
else:
repaired[key] = first
elif key in _SCHEMA_NODE_KEYS:
# items / not / additionalProperties: single nested schema.
# additionalProperties can also be a bool — leave those alone.
@ -130,6 +152,15 @@ def _repair_schema(node: Any, is_schema: bool = True) -> Any:
else:
repaired.pop("enum")
# Rule 4: $ref nodes must not have sibling keywords. Moonshot expands
# the reference before validation and then rejects the node if siblings
# like ``description`` / ``type`` / ``default`` appear alongside $ref.
# The referenced definition still carries its own description on the
# target node, which Moonshot accepts.
# (Ported from anomalyco/opencode#24730.)
if "$ref" in repaired:
return {"$ref": repaired["$ref"]}
return repaired

View file

@ -425,7 +425,7 @@ def build_skill_invocation_message(
loaded = _load_skill_payload(skill_info["skill_dir"], task_id=task_id)
if not loaded:
return f"[Failed to load skill: {skill_info['name']}]"
return None
loaded_skill, skill_dir, skill_name = loaded

View file

@ -24,7 +24,10 @@ class ResponsesApiTransport(ProviderTransport):
def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any:
"""Convert OpenAI chat messages to Responses API input items."""
from agent.codex_responses_adapter import _chat_messages_to_responses_input
return _chat_messages_to_responses_input(messages)
return _chat_messages_to_responses_input(
messages,
is_xai_responses=bool(kwargs.get("is_xai_responses")),
)
def convert_tools(self, tools: List[Dict[str, Any]]) -> Any:
"""Convert OpenAI tool schemas to Responses API function definitions."""
@ -89,24 +92,38 @@ class ResponsesApiTransport(ProviderTransport):
_effort_clamp = {"minimal": "low"}
reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort)
response_tools = _responses_tools(tools)
kwargs = {
"model": model,
"instructions": instructions,
"input": _chat_messages_to_responses_input(payload_messages),
"tools": _responses_tools(tools),
"tool_choice": "auto",
"parallel_tool_calls": True,
"input": _chat_messages_to_responses_input(
payload_messages,
is_xai_responses=is_xai_responses,
),
"tools": response_tools,
"store": False,
}
if response_tools:
kwargs["tool_choice"] = "auto"
kwargs["parallel_tool_calls"] = True
session_id = params.get("session_id")
if not is_github_responses and session_id:
# xAI Responses takes prompt_cache_key in extra_body (set further
# down); GitHub Models opts out of cache-key routing entirely.
if not is_github_responses and not is_xai_responses and session_id:
kwargs["prompt_cache_key"] = session_id
if reasoning_enabled and is_xai_responses:
from agent.model_metadata import grok_supports_reasoning_effort
kwargs["include"] = ["reasoning.encrypted_content"]
# NOTE: Hermes does NOT ask xAI to return ``reasoning.encrypted_content``
# any more. xAI's OAuth/SuperGrok ``/v1/responses`` surface rejects
# replayed encrypted reasoning items on turn 2+ — see
# _chat_messages_to_responses_input docstring. Requesting the field
# back would just have us cache something we then must strip. Grok
# still reasons natively each turn; coherence across turns rides on
# the visible message text alone.
kwargs["include"] = []
# xAI rejects `reasoning.effort` on grok-4 / grok-4-fast / grok-3
# / grok-code-fast / grok-4.20-0309-* with HTTP 400 even though
# those models reason natively. Only send the effort dial when
@ -165,6 +182,17 @@ class ResponsesApiTransport(ProviderTransport):
merged_extra_headers["x-grok-conv-id"] = session_id
kwargs["extra_headers"] = merged_extra_headers
# xAI Responses cache-routing — body-level field per
# https://docs.x.ai/developers/advanced-api-usage/prompt-caching/maximizing-cache-hits.
# Sent via extra_body (not the typed kwarg) so it survives openai
# SDK builds whose Responses.stream() signature has dropped the field.
existing_extra_body = kwargs.get("extra_body")
merged_extra_body: Dict[str, Any] = {}
if isinstance(existing_extra_body, dict):
merged_extra_body.update(existing_extra_body)
merged_extra_body.setdefault("prompt_cache_key", session_id)
kwargs["extra_body"] = merged_extra_body
return kwargs
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:

View file

@ -14,20 +14,28 @@ the user gets full Hermes capability inside a Codex turn.
Scope (what we expose):
- web_search, web_extract Firecrawl, no codex equivalent
- browser_navigate / _click / _type / Camofox/Browserbase automation
_snapshot / _screenshot / _scroll / _back / _press / _vision
- delegate_task Hermes subagents
_snapshot / _scroll / _back / _press /
_get_images / _console / _vision
- vision_analyze image inspection by vision model
- image_generate image generation
- memory Hermes' persistent memory store
- skill_view, skills_list Hermes' skill library
- session_search cross-session search
- text_to_speech TTS
- kanban_* (complete/block/comment/ kanban worker + orchestrator
heartbeat/show/list/create/ handoff (stateless: read env var,
unblock/link) write ~/.hermes/kanban.db)
What we DO NOT expose (codex has equivalents):
What we DO NOT expose:
- terminal / shell codex's own shell tool
- read_file / write_file / patch codex's apply_patch + shell
- search_files / process codex's shell
- clarify, todo codex's own UX
- clarify codex's own UX
- delegate_task / memory / `_AGENT_LOOP_TOOLS` in Hermes
session_search / todo (model_tools.py). They require
the running AIAgent context to
dispatch (mid-loop state), so a
stateless MCP callback can't
drive them. See the inline
comment on EXPOSED_TOOLS below.
Run with: python -m agent.transports.hermes_tools_mcp_server
Spawned by: CodexAppServerSession.ensure_started() when the runtime is