diff --git a/acp_adapter/copilot_client.py b/acp_adapter/copilot_client.py
new file mode 100644
index 0000000000..e6a08e5975
--- /dev/null
+++ b/acp_adapter/copilot_client.py
@@ -0,0 +1,632 @@
+"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
+
+This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
+backend. Each request starts a short-lived ACP session, sends the formatted
+conversation as a single prompt, collects text chunks, and converts the result
+back into the minimal shape Hermes expects from an OpenAI client.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import queue
+import re
+import shlex
+import subprocess
+import threading
+import time
+from collections import deque
+from pathlib import Path
+from types import SimpleNamespace
+from typing import Any
+
+from agent.file_safety import get_read_block_error, is_write_denied
+from agent.redact import redact_sensitive_text
+
+ACP_MARKER_BASE_URL = "acp://copilot"
+_DEFAULT_TIMEOUT_SECONDS = 900.0
+
+_TOOL_CALL_BLOCK_RE = re.compile(r"\s*(\{.*?\})\s*", re.DOTALL)
+_TOOL_CALL_JSON_RE = re.compile(
+ r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}",
+ re.DOTALL,
+)
+
+
+def _resolve_command() -> str:
+ return (
+ os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
+ or os.getenv("COPILOT_CLI_PATH", "").strip()
+ or "copilot"
+ )
+
+
+def _resolve_args() -> list[str]:
+ raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
+ if not raw:
+ return ["--acp", "--stdio"]
+ return shlex.split(raw)
+
+
+def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
+ return {
+ "jsonrpc": "2.0",
+ "id": message_id,
+ "error": {
+ "code": code,
+ "message": message,
+ },
+ }
+
+
+def _permission_denied(message_id: Any) -> dict[str, Any]:
+ return {
+ "jsonrpc": "2.0",
+ "id": message_id,
+ "result": {
+ "outcome": {
+ "outcome": "cancelled",
+ }
+ },
+ }
+
+
+def _format_messages_as_prompt(
+ messages: list[dict[str, Any]],
+ model: str | None = None,
+ tools: list[dict[str, Any]] | None = None,
+ tool_choice: Any = None,
+) -> str:
+ sections: list[str] = [
+ "You are being used as the active ACP agent backend for Hermes.",
+ "Use ACP capabilities to complete tasks.",
+ "IMPORTANT: If you take an action with a tool, you MUST output tool calls using {...} blocks with JSON exactly in OpenAI function-call shape.",
+ "If no tool is needed, answer normally.",
+ ]
+ if model:
+ sections.append(f"Hermes requested model hint: {model}")
+
+ if isinstance(tools, list) and tools:
+ tool_specs: list[dict[str, Any]] = []
+ for t in tools:
+ if not isinstance(t, dict):
+ continue
+ fn = t.get("function") or {}
+ if not isinstance(fn, dict):
+ continue
+ name = fn.get("name")
+ if not isinstance(name, str) or not name.strip():
+ continue
+ tool_specs.append(
+ {
+ "name": name.strip(),
+ "description": fn.get("description", ""),
+ "parameters": fn.get("parameters", {}),
+ }
+ )
+ if tool_specs:
+ sections.append(
+ "Available tools (OpenAI function schema). "
+ "When using a tool, emit ONLY {...} with one JSON object "
+ "containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
+ + json.dumps(tool_specs, ensure_ascii=False)
+ )
+
+ if tool_choice is not None:
+ sections.append(
+ f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}"
+ )
+
+ transcript: list[str] = []
+ for message in messages:
+ if not isinstance(message, dict):
+ continue
+ role = str(message.get("role") or "unknown").strip().lower()
+ if role == "tool":
+ role = "tool"
+ elif role not in {"system", "user", "assistant"}:
+ role = "context"
+
+ content = message.get("content")
+ rendered = _render_message_content(content)
+ if not rendered:
+ continue
+
+ label = {
+ "system": "System",
+ "user": "User",
+ "assistant": "Assistant",
+ "tool": "Tool",
+ "context": "Context",
+ }.get(role, role.title())
+ transcript.append(f"{label}:\n{rendered}")
+
+ if transcript:
+ sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
+
+ sections.append("Continue the conversation from the latest user request.")
+ return "\n\n".join(
+ section.strip() for section in sections if section and section.strip()
+ )
+
+
+def _render_message_content(content: Any) -> str:
+ if content is None:
+ return ""
+ if isinstance(content, str):
+ return content.strip()
+ if isinstance(content, dict):
+ if "text" in content:
+ return str(content.get("text") or "").strip()
+ if "content" in content and isinstance(content.get("content"), str):
+ return str(content.get("content") or "").strip()
+ return json.dumps(content, ensure_ascii=True)
+ if isinstance(content, list):
+ parts: list[str] = []
+ for item in content:
+ if isinstance(item, str):
+ parts.append(item)
+ elif isinstance(item, dict):
+ text = item.get("text")
+ if isinstance(text, str) and text.strip():
+ parts.append(text.strip())
+ return "\n".join(parts).strip()
+ return str(content).strip()
+
+
+def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
+ if not isinstance(text, str) or not text.strip():
+ return [], ""
+
+ extracted: list[SimpleNamespace] = []
+ consumed_spans: list[tuple[int, int]] = []
+
+ def _try_add_tool_call(raw_json: str) -> None:
+ try:
+ obj = json.loads(raw_json)
+ except Exception:
+ return
+ if not isinstance(obj, dict):
+ return
+ fn = obj.get("function")
+ if not isinstance(fn, dict):
+ return
+ fn_name = fn.get("name")
+ if not isinstance(fn_name, str) or not fn_name.strip():
+ return
+ fn_args = fn.get("arguments", "{}")
+ if not isinstance(fn_args, str):
+ fn_args = json.dumps(fn_args, ensure_ascii=False)
+ call_id = obj.get("id")
+ if not isinstance(call_id, str) or not call_id.strip():
+ call_id = f"acp_call_{len(extracted) + 1}"
+
+ extracted.append(
+ SimpleNamespace(
+ id=call_id,
+ call_id=call_id,
+ response_item_id=None,
+ type="function",
+ function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
+ )
+ )
+
+ for m in _TOOL_CALL_BLOCK_RE.finditer(text):
+ raw = m.group(1)
+ _try_add_tool_call(raw)
+ consumed_spans.append((m.start(), m.end()))
+
+ # Only try bare-JSON fallback when no XML blocks were found.
+ if not extracted:
+ for m in _TOOL_CALL_JSON_RE.finditer(text):
+ raw = m.group(0)
+ _try_add_tool_call(raw)
+ consumed_spans.append((m.start(), m.end()))
+
+ if not consumed_spans:
+ return extracted, text.strip()
+
+ consumed_spans.sort()
+ merged: list[tuple[int, int]] = []
+ for start, end in consumed_spans:
+ if not merged or start > merged[-1][1]:
+ merged.append((start, end))
+ else:
+ merged[-1] = (merged[-1][0], max(merged[-1][1], end))
+
+ parts: list[str] = []
+ cursor = 0
+ for start, end in merged:
+ if cursor < start:
+ parts.append(text[cursor:start])
+ cursor = max(cursor, end)
+ if cursor < len(text):
+ parts.append(text[cursor:])
+
+ cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
+ return extracted, cleaned
+
+
+def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
+ candidate = Path(path_text)
+ if not candidate.is_absolute():
+ raise PermissionError("ACP file-system paths must be absolute.")
+ resolved = candidate.resolve()
+ root = Path(cwd).resolve()
+ try:
+ resolved.relative_to(root)
+ except ValueError as exc:
+ raise PermissionError(
+ f"Path '{resolved}' is outside the session cwd '{root}'."
+ ) from exc
+ return resolved
+
+
+class _ACPChatCompletions:
+ def __init__(self, client: CopilotACPClient):
+ self._client = client
+
+ def create(self, **kwargs: Any) -> Any:
+ return self._client._create_chat_completion(**kwargs)
+
+
+class _ACPChatNamespace:
+ def __init__(self, client: CopilotACPClient):
+ self.completions = _ACPChatCompletions(client)
+
+
+class CopilotACPClient:
+ """Minimal OpenAI-client-compatible facade for Copilot ACP."""
+
+ def __init__(
+ self,
+ *,
+ api_key: str | None = None,
+ base_url: str | None = None,
+ default_headers: dict[str, str] | None = None,
+ acp_command: str | None = None,
+ acp_args: list[str] | None = None,
+ acp_cwd: str | None = None,
+ command: str | None = None,
+ args: list[str] | None = None,
+ **_: Any,
+ ):
+ self.api_key = api_key or "copilot-acp"
+ self.base_url = base_url or ACP_MARKER_BASE_URL
+ self._default_headers = dict(default_headers or {})
+ self._acp_command = acp_command or command or _resolve_command()
+ self._acp_args = list(acp_args or args or _resolve_args())
+ self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
+ self.chat = _ACPChatNamespace(self)
+ self.is_closed = False
+ self._active_process: subprocess.Popen[str] | None = None
+ self._active_process_lock = threading.Lock()
+
+ def close(self) -> None:
+ proc: subprocess.Popen[str] | None
+ with self._active_process_lock:
+ proc = self._active_process
+ self._active_process = None
+ self.is_closed = True
+ if proc is None:
+ return
+ try:
+ proc.terminate()
+ proc.wait(timeout=2)
+ except Exception:
+ try:
+ proc.kill()
+ except Exception:
+ pass
+
+ def _create_chat_completion(
+ self,
+ *,
+ model: str | None = None,
+ messages: list[dict[str, Any]] | None = None,
+ timeout: float | None = None,
+ tools: list[dict[str, Any]] | None = None,
+ tool_choice: Any = None,
+ **_: Any,
+ ) -> Any:
+ prompt_text = _format_messages_as_prompt(
+ messages or [],
+ model=model,
+ tools=tools,
+ tool_choice=tool_choice,
+ )
+ # Normalise timeout: run_agent.py may pass an httpx.Timeout object
+ # (used natively by the OpenAI SDK) rather than a plain float.
+ if timeout is None:
+ _effective_timeout = _DEFAULT_TIMEOUT_SECONDS
+ elif isinstance(timeout, (int, float)):
+ _effective_timeout = float(timeout)
+ else:
+ # httpx.Timeout or similar — pick the largest component so the
+ # subprocess has enough wall-clock time for the full response.
+ _candidates = [
+ getattr(timeout, attr, None)
+ for attr in ("read", "write", "connect", "pool", "timeout")
+ ]
+ _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
+ _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
+
+ response_text, reasoning_text = self._run_prompt(
+ prompt_text,
+ timeout_seconds=_effective_timeout,
+ )
+
+ tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
+
+ usage = SimpleNamespace(
+ prompt_tokens=0,
+ completion_tokens=0,
+ total_tokens=0,
+ prompt_tokens_details=SimpleNamespace(cached_tokens=0),
+ )
+ assistant_message = SimpleNamespace(
+ content=cleaned_text,
+ tool_calls=tool_calls,
+ reasoning=reasoning_text or None,
+ reasoning_content=reasoning_text or None,
+ reasoning_details=None,
+ )
+ finish_reason = "tool_calls" if tool_calls else "stop"
+ choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
+ return SimpleNamespace(
+ choices=[choice],
+ usage=usage,
+ model=model or "copilot-acp",
+ )
+
+ def _run_prompt(
+ self, prompt_text: str, *, timeout_seconds: float
+ ) -> tuple[str, str]:
+ try:
+ proc = subprocess.Popen(
+ [self._acp_command] + self._acp_args,
+ stdin=subprocess.PIPE,
+ stdout=subprocess.PIPE,
+ stderr=subprocess.PIPE,
+ text=True,
+ bufsize=1,
+ cwd=self._acp_cwd,
+ )
+ except FileNotFoundError as exc:
+ raise RuntimeError(
+ f"Could not start Copilot ACP command '{self._acp_command}'. "
+ "Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
+ ) from exc
+
+ if proc.stdin is None or proc.stdout is None:
+ proc.kill()
+ raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
+
+ self.is_closed = False
+ with self._active_process_lock:
+ self._active_process = proc
+
+ inbox: queue.Queue[dict[str, Any]] = queue.Queue()
+ stderr_tail: deque[str] = deque(maxlen=40)
+
+ def _stdout_reader() -> None:
+ if proc.stdout is None:
+ return
+ for line in proc.stdout:
+ try:
+ inbox.put(json.loads(line))
+ except Exception:
+ inbox.put({"raw": line.rstrip("\n")})
+
+ def _stderr_reader() -> None:
+ if proc.stderr is None:
+ return
+ for line in proc.stderr:
+ stderr_tail.append(line.rstrip("\n"))
+
+ out_thread = threading.Thread(target=_stdout_reader, daemon=True)
+ err_thread = threading.Thread(target=_stderr_reader, daemon=True)
+ out_thread.start()
+ err_thread.start()
+
+ next_id = 0
+
+ def _request(
+ method: str,
+ params: dict[str, Any],
+ *,
+ text_parts: list[str] | None = None,
+ reasoning_parts: list[str] | None = None,
+ ) -> Any:
+ nonlocal next_id
+ next_id += 1
+ request_id = next_id
+ payload = {
+ "jsonrpc": "2.0",
+ "id": request_id,
+ "method": method,
+ "params": params,
+ }
+ assert proc.stdin is not None # always set: Popen(stdin=PIPE)
+ proc.stdin.write(json.dumps(payload) + "\n")
+ proc.stdin.flush()
+
+ deadline = time.time() + timeout_seconds
+ while time.time() < deadline:
+ if proc.poll() is not None:
+ break
+ try:
+ msg = inbox.get(timeout=0.1)
+ except queue.Empty:
+ continue
+
+ if self._handle_server_message(
+ msg,
+ process=proc,
+ cwd=self._acp_cwd,
+ text_parts=text_parts,
+ reasoning_parts=reasoning_parts,
+ ):
+ continue
+
+ if msg.get("id") != request_id:
+ continue
+ if "error" in msg:
+ err = msg.get("error") or {}
+ raise RuntimeError(
+ f"Copilot ACP {method} failed: {err.get('message') or err}"
+ )
+ return msg.get("result")
+
+ stderr_text = "\n".join(stderr_tail).strip()
+ if proc.poll() is not None and stderr_text:
+ raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
+ raise TimeoutError(
+ f"Timed out waiting for Copilot ACP response to {method}."
+ )
+
+ try:
+ _request(
+ "initialize",
+ {
+ "protocolVersion": 1,
+ "clientCapabilities": {
+ "fs": {
+ "readTextFile": True,
+ "writeTextFile": True,
+ }
+ },
+ "clientInfo": {
+ "name": "hermes-agent",
+ "title": "Hermes Agent",
+ "version": "0.0.0",
+ },
+ },
+ )
+ session = (
+ _request(
+ "session/new",
+ {
+ "cwd": self._acp_cwd,
+ "mcpServers": [],
+ },
+ )
+ or {}
+ )
+ session_id = str(session.get("sessionId") or "").strip()
+ if not session_id:
+ raise RuntimeError("Copilot ACP did not return a sessionId.")
+
+ text_parts: list[str] = []
+ reasoning_parts: list[str] = []
+ _request(
+ "session/prompt",
+ {
+ "sessionId": session_id,
+ "prompt": [
+ {
+ "type": "text",
+ "text": prompt_text,
+ }
+ ],
+ },
+ text_parts=text_parts,
+ reasoning_parts=reasoning_parts,
+ )
+ return "".join(text_parts), "".join(reasoning_parts)
+ finally:
+ self.close()
+
+ def _handle_server_message(
+ self,
+ msg: dict[str, Any],
+ *,
+ process: subprocess.Popen[str],
+ cwd: str,
+ text_parts: list[str] | None,
+ reasoning_parts: list[str] | None,
+ ) -> bool:
+ method = msg.get("method")
+ if not isinstance(method, str):
+ return False
+
+ if method == "session/update":
+ params = msg.get("params") or {}
+ update = params.get("update") or {}
+ kind = str(update.get("sessionUpdate") or "").strip()
+ content = update.get("content") or {}
+ chunk_text = ""
+ if isinstance(content, dict):
+ chunk_text = str(content.get("text") or "")
+ if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
+ text_parts.append(chunk_text)
+ elif (
+ kind == "agent_thought_chunk"
+ and chunk_text
+ and reasoning_parts is not None
+ ):
+ reasoning_parts.append(chunk_text)
+ return True
+
+ if process.stdin is None:
+ return True
+
+ message_id = msg.get("id")
+ params = msg.get("params") or {}
+
+ if method == "session/request_permission":
+ response = _permission_denied(message_id)
+ elif method == "fs/read_text_file":
+ try:
+ path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+ block_error = get_read_block_error(str(path))
+ if block_error:
+ raise PermissionError(block_error)
+ content = path.read_text() if path.exists() else ""
+ line = params.get("line")
+ limit = params.get("limit")
+ if isinstance(line, int) and line > 1:
+ lines = content.splitlines(keepends=True)
+ start = line - 1
+ end = (
+ start + limit if isinstance(limit, int) and limit > 0 else None
+ )
+ content = "".join(lines[start:end])
+ if content:
+ content = redact_sensitive_text(content)
+ response = {
+ "jsonrpc": "2.0",
+ "id": message_id,
+ "result": {
+ "content": content,
+ },
+ }
+ except Exception as exc:
+ response = _jsonrpc_error(message_id, -32602, str(exc))
+ elif method == "fs/write_text_file":
+ try:
+ path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
+ if is_write_denied(str(path)):
+ raise PermissionError(
+ f"Write denied: '{path}' is a protected system/credential file."
+ )
+ path.parent.mkdir(parents=True, exist_ok=True)
+ path.write_text(str(params.get("content") or ""))
+ response = {
+ "jsonrpc": "2.0",
+ "id": message_id,
+ "result": None,
+ }
+ except Exception as exc:
+ response = _jsonrpc_error(message_id, -32602, str(exc))
+ else:
+ response = _jsonrpc_error(
+ message_id,
+ -32601,
+ f"ACP client method '{method}' is not supported by Hermes yet.",
+ )
+
+ process.stdin.write(json.dumps(response) + "\n")
+ process.stdin.flush()
+ return True
diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py
index 13fb1c8924..6d34dabe55 100644
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@@ -151,23 +151,31 @@ def _fixed_temperature_for_model(
return None
# Default auxiliary models for direct API-key providers (cheap/fast for side tasks)
-_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = {
- "gemini": "gemini-3-flash-preview",
- "zai": "glm-4.5-flash",
- "kimi-coding": "kimi-k2-turbo-preview",
- "stepfun": "step-3.5-flash",
- "kimi-coding-cn": "kimi-k2-turbo-preview",
- "gmi": "google/gemini-3.1-flash-lite-preview",
- "minimax": "MiniMax-M2.7",
- "minimax-cn": "MiniMax-M2.7",
+def _get_aux_model_for_provider(provider_id: str) -> str:
+ """Return the cheap auxiliary model for a provider.
+
+ Reads from ProviderProfile.default_aux_model first, falling back to the
+ legacy hardcoded dict for providers that predate the profiles system.
+ """
+ try:
+ from providers import get_provider_profile
+ _p = get_provider_profile(provider_id)
+ if _p and _p.default_aux_model:
+ return _p.default_aux_model
+ except Exception:
+ pass
+ return _API_KEY_PROVIDER_AUX_MODELS_FALLBACK.get(provider_id, "")
+
+
+# Fallback for providers not yet migrated to ProviderProfile.default_aux_model.
+# New providers should set default_aux_model on their profile instead.
+_API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = {
"anthropic": "claude-haiku-4-5-20251001",
- "ai-gateway": "google/gemini-3-flash",
- "opencode-zen": "gemini-3-flash",
- "opencode-go": "glm-5",
- "kilocode": "google/gemini-3-flash-preview",
- "ollama-cloud": "nemotron-3-nano:30b",
}
+# Legacy alias — callers that haven't been updated yet can still use this.
+_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = _API_KEY_PROVIDER_AUX_MODELS_FALLBACK
+
# Vision-specific model overrides for direct providers.
# When the user's main provider has a dedicated vision/multimodal model that
# differs from their main chat model, map it here. The vision auto-detect
@@ -868,7 +876,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
base_url = _to_openai_base_url(
_pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url
)
- model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
+ model = _get_aux_model_for_provider(provider_id) or None
if model is None:
continue # skip provider if we don't know a valid aux model
logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model)
@@ -877,14 +885,22 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
if is_native_gemini_base_url(base_url):
return GeminiNativeClient(api_key=api_key, base_url=base_url), model
- extra = {}
- if base_url_host_matches(base_url, "api.kimi.com"):
- extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
- elif base_url_host_matches(base_url, "api.githubcopilot.com"):
- from hermes_cli.models import copilot_default_headers
+ extra = {}
+ if base_url_host_matches(base_url, "api.kimi.com"):
+ extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"}
+ elif base_url_host_matches(base_url, "api.githubcopilot.com"):
+ from hermes_cli.models import copilot_default_headers
- extra["default_headers"] = copilot_default_headers()
- return OpenAI(api_key=api_key, base_url=base_url, **extra), model
+ extra["default_headers"] = copilot_default_headers()
+ else:
+ try:
+ from providers import get_provider_profile as _gpf_aux
+ _ph_aux = _gpf_aux(provider_id)
+ if _ph_aux and _ph_aux.default_headers:
+ extra["default_headers"] = dict(_ph_aux.default_headers)
+ except Exception:
+ pass
+ return OpenAI(api_key=api_key, base_url=base_url, **extra), model
creds = resolve_api_key_provider_credentials(provider_id)
api_key = str(creds.get("api_key", "")).strip()
@@ -894,7 +910,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
base_url = _to_openai_base_url(
str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
)
- model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id)
+ model = _get_aux_model_for_provider(provider_id) or None
if model is None:
continue # skip provider if we don't know a valid aux model
logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model)
@@ -910,6 +926,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]:
from hermes_cli.models import copilot_default_headers
extra["default_headers"] = copilot_default_headers()
+ else:
+ try:
+ from providers import get_provider_profile as _gpf_aux2
+ _ph_aux2 = _gpf_aux2(provider_id)
+ if _ph_aux2 and _ph_aux2.default_headers:
+ extra["default_headers"] = dict(_ph_aux2.default_headers)
+ except Exception:
+ pass
return OpenAI(api_key=api_key, base_url=base_url, **extra), model
return None, None
@@ -1258,7 +1282,7 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]:
from agent.anthropic_adapter import _is_oauth_token
is_oauth = _is_oauth_token(token)
- model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001")
+ model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001"
logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth)
try:
real_client = build_anthropic_client(token, base_url)
@@ -1642,7 +1666,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False):
except ImportError:
pass
try:
- from agent.copilot_acp_client import CopilotACPClient
+ from acp_adapter.copilot_client import CopilotACPClient
if isinstance(sync_client, CopilotACPClient):
return sync_client, model
except ImportError:
@@ -1986,7 +2010,7 @@ def resolve_provider_client(
str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url
)
- default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "")
+ default_model = _get_aux_model_for_provider(provider)
final_model = _normalize_resolved_model(model or default_model, provider)
if provider == "gemini":
@@ -2056,7 +2080,7 @@ def resolve_provider_client(
"process credentials are incomplete"
)
return None, None
- from agent.copilot_acp_client import CopilotACPClient
+ from acp_adapter.copilot_client import CopilotACPClient
client = CopilotACPClient(
api_key=api_key,
diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py
index 94d40d2d97..6ed499c42f 100644
--- a/agent/copilot_acp_client.py
+++ b/agent/copilot_acp_client.py
@@ -1,646 +1,8 @@
-"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`.
+"""Backward-compatibility shim.
-This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style
-backend. Each request starts a short-lived ACP session, sends the formatted
-conversation as a single prompt, collects text chunks, and converts the result
-back into the minimal shape Hermes expects from an OpenAI client.
+CopilotACPClient has moved to acp_adapter/copilot_client.py.
+This module re-exports it so existing callers continue to work.
"""
+from acp_adapter.copilot_client import CopilotACPClient # noqa: F401
-from __future__ import annotations
-
-import json
-import os
-import queue
-import re
-import shlex
-import subprocess
-import threading
-import time
-from collections import deque
-from pathlib import Path
-from types import SimpleNamespace
-from typing import Any
-
-from agent.file_safety import get_read_block_error, is_write_denied
-from agent.redact import redact_sensitive_text
-
-ACP_MARKER_BASE_URL = "acp://copilot"
-_DEFAULT_TIMEOUT_SECONDS = 900.0
-
-_TOOL_CALL_BLOCK_RE = re.compile(r"\s*(\{.*?\})\s*", re.DOTALL)
-_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL)
-
-
-def _resolve_command() -> str:
- return (
- os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip()
- or os.getenv("COPILOT_CLI_PATH", "").strip()
- or "copilot"
- )
-
-
-def _resolve_args() -> list[str]:
- raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip()
- if not raw:
- return ["--acp", "--stdio"]
- return shlex.split(raw)
-
-
-def _resolve_home_dir() -> str:
- """Return a stable HOME for child ACP processes."""
-
- try:
- from hermes_constants import get_subprocess_home
-
- profile_home = get_subprocess_home()
- if profile_home:
- return profile_home
- except Exception:
- pass
-
- home = os.environ.get("HOME", "").strip()
- if home:
- return home
-
- expanded = os.path.expanduser("~")
- if expanded and expanded != "~":
- return expanded
-
- try:
- import pwd
-
- resolved = pwd.getpwuid(os.getuid()).pw_dir.strip()
- if resolved:
- return resolved
- except Exception:
- pass
-
- # Last resort: /tmp (writable on any POSIX system). Avoids crashing the
- # subprocess with no HOME; callers can set HERMES_HOME explicitly if they
- # need a different writable dir.
- return "/tmp"
-
-
-def _build_subprocess_env() -> dict[str, str]:
- env = os.environ.copy()
- env["HOME"] = _resolve_home_dir()
- return env
-
-
-def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]:
- return {
- "jsonrpc": "2.0",
- "id": message_id,
- "error": {
- "code": code,
- "message": message,
- },
- }
-
-
-def _permission_denied(message_id: Any) -> dict[str, Any]:
- return {
- "jsonrpc": "2.0",
- "id": message_id,
- "result": {
- "outcome": {
- "outcome": "cancelled",
- }
- },
- }
-
-
-def _format_messages_as_prompt(
- messages: list[dict[str, Any]],
- model: str | None = None,
- tools: list[dict[str, Any]] | None = None,
- tool_choice: Any = None,
-) -> str:
- sections: list[str] = [
- "You are being used as the active ACP agent backend for Hermes.",
- "Use ACP capabilities to complete tasks.",
- "IMPORTANT: If you take an action with a tool, you MUST output tool calls using {...} blocks with JSON exactly in OpenAI function-call shape.",
- "If no tool is needed, answer normally.",
- ]
- if model:
- sections.append(f"Hermes requested model hint: {model}")
-
- if isinstance(tools, list) and tools:
- tool_specs: list[dict[str, Any]] = []
- for t in tools:
- if not isinstance(t, dict):
- continue
- fn = t.get("function") or {}
- if not isinstance(fn, dict):
- continue
- name = fn.get("name")
- if not isinstance(name, str) or not name.strip():
- continue
- tool_specs.append(
- {
- "name": name.strip(),
- "description": fn.get("description", ""),
- "parameters": fn.get("parameters", {}),
- }
- )
- if tool_specs:
- sections.append(
- "Available tools (OpenAI function schema). "
- "When using a tool, emit ONLY {...} with one JSON object "
- "containing id/type/function{name,arguments}. arguments must be a JSON string.\n"
- + json.dumps(tool_specs, ensure_ascii=False)
- )
-
- if tool_choice is not None:
- sections.append(f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}")
-
- transcript: list[str] = []
- for message in messages:
- if not isinstance(message, dict):
- continue
- role = str(message.get("role") or "unknown").strip().lower()
- if role == "tool":
- role = "tool"
- elif role not in {"system", "user", "assistant"}:
- role = "context"
-
- content = message.get("content")
- rendered = _render_message_content(content)
- if not rendered:
- continue
-
- label = {
- "system": "System",
- "user": "User",
- "assistant": "Assistant",
- "tool": "Tool",
- "context": "Context",
- }.get(role, role.title())
- transcript.append(f"{label}:\n{rendered}")
-
- if transcript:
- sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript))
-
- sections.append("Continue the conversation from the latest user request.")
- return "\n\n".join(section.strip() for section in sections if section and section.strip())
-
-
-def _render_message_content(content: Any) -> str:
- if content is None:
- return ""
- if isinstance(content, str):
- return content.strip()
- if isinstance(content, dict):
- if "text" in content:
- return str(content.get("text") or "").strip()
- if "content" in content and isinstance(content.get("content"), str):
- return str(content.get("content") or "").strip()
- return json.dumps(content, ensure_ascii=True)
- if isinstance(content, list):
- parts: list[str] = []
- for item in content:
- if isinstance(item, str):
- parts.append(item)
- elif isinstance(item, dict):
- text = item.get("text")
- if isinstance(text, str) and text.strip():
- parts.append(text.strip())
- return "\n".join(parts).strip()
- return str(content).strip()
-
-
-def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]:
- if not isinstance(text, str) or not text.strip():
- return [], ""
-
- extracted: list[SimpleNamespace] = []
- consumed_spans: list[tuple[int, int]] = []
-
- def _try_add_tool_call(raw_json: str) -> None:
- try:
- obj = json.loads(raw_json)
- except Exception:
- return
- if not isinstance(obj, dict):
- return
- fn = obj.get("function")
- if not isinstance(fn, dict):
- return
- fn_name = fn.get("name")
- if not isinstance(fn_name, str) or not fn_name.strip():
- return
- fn_args = fn.get("arguments", "{}")
- if not isinstance(fn_args, str):
- fn_args = json.dumps(fn_args, ensure_ascii=False)
- call_id = obj.get("id")
- if not isinstance(call_id, str) or not call_id.strip():
- call_id = f"acp_call_{len(extracted)+1}"
-
- extracted.append(
- SimpleNamespace(
- id=call_id,
- call_id=call_id,
- response_item_id=None,
- type="function",
- function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args),
- )
- )
-
- for m in _TOOL_CALL_BLOCK_RE.finditer(text):
- raw = m.group(1)
- _try_add_tool_call(raw)
- consumed_spans.append((m.start(), m.end()))
-
- # Only try bare-JSON fallback when no XML blocks were found.
- if not extracted:
- for m in _TOOL_CALL_JSON_RE.finditer(text):
- raw = m.group(0)
- _try_add_tool_call(raw)
- consumed_spans.append((m.start(), m.end()))
-
- if not consumed_spans:
- return extracted, text.strip()
-
- consumed_spans.sort()
- merged: list[tuple[int, int]] = []
- for start, end in consumed_spans:
- if not merged or start > merged[-1][1]:
- merged.append((start, end))
- else:
- merged[-1] = (merged[-1][0], max(merged[-1][1], end))
-
- parts: list[str] = []
- cursor = 0
- for start, end in merged:
- if cursor < start:
- parts.append(text[cursor:start])
- cursor = max(cursor, end)
- if cursor < len(text):
- parts.append(text[cursor:])
-
- cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip()
- return extracted, cleaned
-
-
-
-def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path:
- candidate = Path(path_text)
- if not candidate.is_absolute():
- raise PermissionError("ACP file-system paths must be absolute.")
- resolved = candidate.resolve()
- root = Path(cwd).resolve()
- try:
- resolved.relative_to(root)
- except ValueError as exc:
- raise PermissionError(f"Path '{resolved}' is outside the session cwd '{root}'.") from exc
- return resolved
-
-
-class _ACPChatCompletions:
- def __init__(self, client: "CopilotACPClient"):
- self._client = client
-
- def create(self, **kwargs: Any) -> Any:
- return self._client._create_chat_completion(**kwargs)
-
-
-class _ACPChatNamespace:
- def __init__(self, client: "CopilotACPClient"):
- self.completions = _ACPChatCompletions(client)
-
-
-class CopilotACPClient:
- """Minimal OpenAI-client-compatible facade for Copilot ACP."""
-
- def __init__(
- self,
- *,
- api_key: str | None = None,
- base_url: str | None = None,
- default_headers: dict[str, str] | None = None,
- acp_command: str | None = None,
- acp_args: list[str] | None = None,
- acp_cwd: str | None = None,
- command: str | None = None,
- args: list[str] | None = None,
- **_: Any,
- ):
- self.api_key = api_key or "copilot-acp"
- self.base_url = base_url or ACP_MARKER_BASE_URL
- self._default_headers = dict(default_headers or {})
- self._acp_command = acp_command or command or _resolve_command()
- self._acp_args = list(acp_args or args or _resolve_args())
- self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve())
- self.chat = _ACPChatNamespace(self)
- self.is_closed = False
- self._active_process: subprocess.Popen[str] | None = None
- self._active_process_lock = threading.Lock()
-
- def close(self) -> None:
- proc: subprocess.Popen[str] | None
- with self._active_process_lock:
- proc = self._active_process
- self._active_process = None
- self.is_closed = True
- if proc is None:
- return
- try:
- proc.terminate()
- proc.wait(timeout=2)
- except Exception:
- try:
- proc.kill()
- except Exception:
- pass
-
- def _create_chat_completion(
- self,
- *,
- model: str | None = None,
- messages: list[dict[str, Any]] | None = None,
- timeout: float | None = None,
- tools: list[dict[str, Any]] | None = None,
- tool_choice: Any = None,
- **_: Any,
- ) -> Any:
- prompt_text = _format_messages_as_prompt(
- messages or [],
- model=model,
- tools=tools,
- tool_choice=tool_choice,
- )
- # Normalise timeout: run_agent.py may pass an httpx.Timeout object
- # (used natively by the OpenAI SDK) rather than a plain float.
- if timeout is None:
- _effective_timeout = _DEFAULT_TIMEOUT_SECONDS
- elif isinstance(timeout, (int, float)):
- _effective_timeout = float(timeout)
- else:
- # httpx.Timeout or similar — pick the largest component so the
- # subprocess has enough wall-clock time for the full response.
- _candidates = [
- getattr(timeout, attr, None)
- for attr in ("read", "write", "connect", "pool", "timeout")
- ]
- _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))]
- _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS
-
- response_text, reasoning_text = self._run_prompt(
- prompt_text,
- timeout_seconds=_effective_timeout,
- )
-
- tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text)
-
- usage = SimpleNamespace(
- prompt_tokens=0,
- completion_tokens=0,
- total_tokens=0,
- prompt_tokens_details=SimpleNamespace(cached_tokens=0),
- )
- assistant_message = SimpleNamespace(
- content=cleaned_text,
- tool_calls=tool_calls,
- reasoning=reasoning_text or None,
- reasoning_content=reasoning_text or None,
- reasoning_details=None,
- )
- finish_reason = "tool_calls" if tool_calls else "stop"
- choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason)
- return SimpleNamespace(
- choices=[choice],
- usage=usage,
- model=model or "copilot-acp",
- )
-
- def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, str]:
- try:
- proc = subprocess.Popen(
- [self._acp_command] + self._acp_args,
- stdin=subprocess.PIPE,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- text=True,
- bufsize=1,
- cwd=self._acp_cwd,
- env=_build_subprocess_env(),
- )
- except FileNotFoundError as exc:
- raise RuntimeError(
- f"Could not start Copilot ACP command '{self._acp_command}'. "
- "Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH."
- ) from exc
-
- if proc.stdin is None or proc.stdout is None:
- proc.kill()
- raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.")
-
- self.is_closed = False
- with self._active_process_lock:
- self._active_process = proc
-
- inbox: queue.Queue[dict[str, Any]] = queue.Queue()
- stderr_tail: deque[str] = deque(maxlen=40)
-
- def _stdout_reader() -> None:
- if proc.stdout is None:
- return
- for line in proc.stdout:
- try:
- inbox.put(json.loads(line))
- except Exception:
- inbox.put({"raw": line.rstrip("\n")})
-
- def _stderr_reader() -> None:
- if proc.stderr is None:
- return
- for line in proc.stderr:
- stderr_tail.append(line.rstrip("\n"))
-
- out_thread = threading.Thread(target=_stdout_reader, daemon=True)
- err_thread = threading.Thread(target=_stderr_reader, daemon=True)
- out_thread.start()
- err_thread.start()
-
- next_id = 0
-
- def _request(method: str, params: dict[str, Any], *, text_parts: list[str] | None = None, reasoning_parts: list[str] | None = None) -> Any:
- nonlocal next_id
- next_id += 1
- request_id = next_id
- payload = {
- "jsonrpc": "2.0",
- "id": request_id,
- "method": method,
- "params": params,
- }
- proc.stdin.write(json.dumps(payload) + "\n")
- proc.stdin.flush()
-
- deadline = time.time() + timeout_seconds
- while time.time() < deadline:
- if proc.poll() is not None:
- break
- try:
- msg = inbox.get(timeout=0.1)
- except queue.Empty:
- continue
-
- if self._handle_server_message(
- msg,
- process=proc,
- cwd=self._acp_cwd,
- text_parts=text_parts,
- reasoning_parts=reasoning_parts,
- ):
- continue
-
- if msg.get("id") != request_id:
- continue
- if "error" in msg:
- err = msg.get("error") or {}
- raise RuntimeError(
- f"Copilot ACP {method} failed: {err.get('message') or err}"
- )
- return msg.get("result")
-
- stderr_text = "\n".join(stderr_tail).strip()
- if proc.poll() is not None and stderr_text:
- raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}")
- raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.")
-
- try:
- _request(
- "initialize",
- {
- "protocolVersion": 1,
- "clientCapabilities": {
- "fs": {
- "readTextFile": True,
- "writeTextFile": True,
- }
- },
- "clientInfo": {
- "name": "hermes-agent",
- "title": "Hermes Agent",
- "version": "0.0.0",
- },
- },
- )
- session = _request(
- "session/new",
- {
- "cwd": self._acp_cwd,
- "mcpServers": [],
- },
- ) or {}
- session_id = str(session.get("sessionId") or "").strip()
- if not session_id:
- raise RuntimeError("Copilot ACP did not return a sessionId.")
-
- text_parts: list[str] = []
- reasoning_parts: list[str] = []
- _request(
- "session/prompt",
- {
- "sessionId": session_id,
- "prompt": [
- {
- "type": "text",
- "text": prompt_text,
- }
- ],
- },
- text_parts=text_parts,
- reasoning_parts=reasoning_parts,
- )
- return "".join(text_parts), "".join(reasoning_parts)
- finally:
- self.close()
-
- def _handle_server_message(
- self,
- msg: dict[str, Any],
- *,
- process: subprocess.Popen[str],
- cwd: str,
- text_parts: list[str] | None,
- reasoning_parts: list[str] | None,
- ) -> bool:
- method = msg.get("method")
- if not isinstance(method, str):
- return False
-
- if method == "session/update":
- params = msg.get("params") or {}
- update = params.get("update") or {}
- kind = str(update.get("sessionUpdate") or "").strip()
- content = update.get("content") or {}
- chunk_text = ""
- if isinstance(content, dict):
- chunk_text = str(content.get("text") or "")
- if kind == "agent_message_chunk" and chunk_text and text_parts is not None:
- text_parts.append(chunk_text)
- elif kind == "agent_thought_chunk" and chunk_text and reasoning_parts is not None:
- reasoning_parts.append(chunk_text)
- return True
-
- if process.stdin is None:
- return True
-
- message_id = msg.get("id")
- params = msg.get("params") or {}
-
- if method == "session/request_permission":
- response = _permission_denied(message_id)
- elif method == "fs/read_text_file":
- try:
- path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
- block_error = get_read_block_error(str(path))
- if block_error:
- raise PermissionError(block_error)
- content = path.read_text() if path.exists() else ""
- line = params.get("line")
- limit = params.get("limit")
- if isinstance(line, int) and line > 1:
- lines = content.splitlines(keepends=True)
- start = line - 1
- end = start + limit if isinstance(limit, int) and limit > 0 else None
- content = "".join(lines[start:end])
- if content:
- content = redact_sensitive_text(content)
- response = {
- "jsonrpc": "2.0",
- "id": message_id,
- "result": {
- "content": content,
- },
- }
- except Exception as exc:
- response = _jsonrpc_error(message_id, -32602, str(exc))
- elif method == "fs/write_text_file":
- try:
- path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd)
- if is_write_denied(str(path)):
- raise PermissionError(
- f"Write denied: '{path}' is a protected system/credential file."
- )
- path.parent.mkdir(parents=True, exist_ok=True)
- path.write_text(str(params.get("content") or ""))
- response = {
- "jsonrpc": "2.0",
- "id": message_id,
- "result": None,
- }
- except Exception as exc:
- response = _jsonrpc_error(message_id, -32602, str(exc))
- else:
- response = _jsonrpc_error(
- message_id,
- -32601,
- f"ACP client method '{method}' is not supported by Hermes yet.",
- )
-
- process.stdin.write(json.dumps(response) + "\n")
- process.stdin.flush()
- return True
+__all__ = ["CopilotACPClient"]
diff --git a/agent/model_metadata.py b/agent/model_metadata.py
index 6ea1603565..6522a136df 100644
--- a/agent/model_metadata.py
+++ b/agent/model_metadata.py
@@ -313,6 +313,17 @@ _URL_TO_PROVIDER: Dict[str, str] = {
"ollama.com": "ollama-cloud",
}
+# Auto-extend with hostnames derived from provider profiles.
+# Any provider with a base_url not already in the map gets added automatically.
+try:
+ from providers import list_providers as _list_providers
+ for _pp in _list_providers():
+ _host = _pp.get_hostname()
+ if _host and _host not in _URL_TO_PROVIDER:
+ _URL_TO_PROVIDER[_host] = _pp.name
+except Exception:
+ pass
+
def _infer_provider_from_url(base_url: str) -> Optional[str]:
"""Infer the models.dev provider name from a base URL.
diff --git a/agent/transports/__init__.py b/agent/transports/__init__.py
index d1c8251ed2..b606da7fec 100644
--- a/agent/transports/__init__.py
+++ b/agent/transports/__init__.py
@@ -6,9 +6,16 @@ Usage:
result = transport.normalize_response(raw_response)
"""
-from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401
+from agent.transports.types import (
+ NormalizedResponse,
+ ToolCall,
+ Usage,
+ build_tool_call,
+ map_finish_reason,
+) # noqa: F401
_REGISTRY: dict = {}
+_discovered: bool = False
def register_transport(api_mode: str, transport_cls: type) -> None:
@@ -23,6 +30,9 @@ def get_transport(api_mode: str):
This allows gradual migration — call sites can check for None
and fall back to the legacy code path.
"""
+ global _discovered
+ if not _discovered:
+ _discover_transports()
cls = _REGISTRY.get(api_mode)
if cls is None:
# The registry can be partially populated when a specific transport
@@ -38,6 +48,8 @@ def get_transport(api_mode: str):
def _discover_transports() -> None:
"""Import all transport modules to trigger auto-registration."""
+ global _discovered
+ _discovered = True
try:
import agent.transports.anthropic # noqa: F401
except ImportError:
diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py
index 34d5caa88a..c50557c061 100644
--- a/agent/transports/chat_completions.py
+++ b/agent/transports/chat_completions.py
@@ -10,7 +10,7 @@ reasoning configuration, temperature handling, and extra_body assembly.
"""
import copy
-from typing import Any, Dict, List, Optional
+from typing import Any
from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools
from agent.prompt_builder import DEVELOPER_ROLE_MODELS
@@ -28,7 +28,9 @@ class ChatCompletionsTransport(ProviderTransport):
def api_mode(self) -> str:
return "chat_completions"
- def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]:
+ def convert_messages(
+ self, messages: list[dict[str, Any]], **kwargs
+ ) -> list[dict[str, Any]]:
"""Messages are already in OpenAI format — sanitize Codex leaks only.
Strips Codex Responses API fields (``codex_reasoning_items`` /
@@ -45,7 +47,9 @@ class ChatCompletionsTransport(ProviderTransport):
tool_calls = msg.get("tool_calls")
if isinstance(tool_calls, list):
for tc in tool_calls:
- if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc):
+ if isinstance(tc, dict) and (
+ "call_id" in tc or "response_item_id" in tc
+ ):
needs_sanitize = True
break
if needs_sanitize:
@@ -68,76 +72,52 @@ class ChatCompletionsTransport(ProviderTransport):
tc.pop("response_item_id", None)
return sanitized
- def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+ def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
"""Tools are already in OpenAI format — identity."""
return tools
def build_kwargs(
self,
model: str,
- messages: List[Dict[str, Any]],
- tools: Optional[List[Dict[str, Any]]] = None,
+ messages: list[dict[str, Any]],
+ tools: list[dict[str, Any]] | None = None,
**params,
- ) -> Dict[str, Any]:
+ ) -> dict[str, Any]:
"""Build chat.completions.create() kwargs.
- This is the most complex transport method — it handles ~16 providers
- via params rather than subclasses.
-
- params:
+ params (all optional):
timeout: float — API call timeout
max_tokens: int | None — user-configured max tokens
- ephemeral_max_output_tokens: int | None — one-shot override (error recovery)
+ ephemeral_max_output_tokens: int | None — one-shot override
max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N}
reasoning_config: dict | None
request_overrides: dict | None
session_id: str | None
- qwen_session_metadata: dict | None — {sessionId, promptId} precomputed
model_lower: str — lowercase model name for pattern matching
- # Provider detection flags (all optional, default False)
- is_openrouter: bool
- is_nous: bool
- is_qwen_portal: bool
- is_github_models: bool
- is_nvidia_nim: bool
- is_kimi: bool
- is_custom_provider: bool
- ollama_num_ctx: int | None
- # Provider routing
- provider_preferences: dict | None
- # Qwen-specific
- qwen_prepare_fn: callable | None — runs AFTER codex sanitization
- qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists
- # Temperature
- fixed_temperature: Any — from _fixed_temperature_for_model()
- omit_temperature: bool
- # Reasoning
+ # Provider profile path (all per-provider quirks live in providers/)
+ provider_profile: ProviderProfile | None — when present, delegates to
+ _build_kwargs_from_profile(); all flag params below are bypassed.
+ # Remaining flags — only used by the legacy fallback for unregistered
+ # providers (i.e. get_provider_profile() returned None). Known
+ # providers all go through provider_profile.
+ qwen_session_metadata: dict | None
supports_reasoning: bool
- github_reasoning_extra: dict | None
- # Claude on OpenRouter/Nous max output
anthropic_max_output: int | None
- # Extra
- extra_body_additions: dict | None — pre-built extra_body entries
+ extra_body_additions: dict | None
"""
# Codex sanitization: drop reasoning_items / call_id / response_item_id
sanitized = self.convert_messages(messages)
- # Qwen portal prep AFTER codex sanitization. If sanitize already
- # deepcopied, reuse that copy via the in-place variant to avoid a
- # second deepcopy.
- is_qwen = params.get("is_qwen_portal", False)
- if is_qwen:
- qwen_prep = params.get("qwen_prepare_fn")
- qwen_prep_inplace = params.get("qwen_prepare_inplace_fn")
- if sanitized is messages:
- if qwen_prep is not None:
- sanitized = qwen_prep(sanitized)
- else:
- # Already deepcopied — transform in place
- if qwen_prep_inplace is not None:
- qwen_prep_inplace(sanitized)
- elif qwen_prep is not None:
- sanitized = qwen_prep(sanitized)
+ # ── Provider profile: single-path when present ──────────────────
+ _profile = params.get("provider_profile")
+ if _profile:
+ return self._build_kwargs_from_profile(
+ _profile, model, sanitized, tools, params
+ )
+
+ # ── Legacy fallback (unregistered / unknown provider) ───────────
+ # Reached only when get_provider_profile() returned None.
+ # Known providers always go through the profile path above.
# Developer role swap for GPT-5/Codex models
model_lower = params.get("model_lower", (model or "").lower())
@@ -150,7 +130,7 @@ class ChatCompletionsTransport(ProviderTransport):
sanitized = list(sanitized)
sanitized[0] = {**sanitized[0], "role": "developer"}
- api_kwargs: Dict[str, Any] = {
+ api_kwargs: dict[str, Any] = {
"model": model,
"messages": sanitized,
}
@@ -159,19 +139,6 @@ class ChatCompletionsTransport(ProviderTransport):
if timeout is not None:
api_kwargs["timeout"] = timeout
- # Temperature
- fixed_temp = params.get("fixed_temperature")
- omit_temp = params.get("omit_temperature", False)
- if omit_temp:
- api_kwargs.pop("temperature", None)
- elif fixed_temp is not None:
- api_kwargs["temperature"] = fixed_temp
-
- # Qwen metadata (caller precomputes {sessionId, promptId})
- qwen_meta = params.get("qwen_session_metadata")
- if qwen_meta and is_qwen:
- api_kwargs["metadata"] = qwen_meta
-
# Tools
if tools:
# Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting
@@ -186,96 +153,24 @@ class ChatCompletionsTransport(ProviderTransport):
ephemeral = params.get("ephemeral_max_output_tokens")
max_tokens = params.get("max_tokens")
anthropic_max_out = params.get("anthropic_max_output")
- is_nvidia_nim = params.get("is_nvidia_nim", False)
- is_kimi = params.get("is_kimi", False)
- reasoning_config = params.get("reasoning_config")
if ephemeral is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(ephemeral))
elif max_tokens is not None and max_tokens_fn:
api_kwargs.update(max_tokens_fn(max_tokens))
- elif is_nvidia_nim and max_tokens_fn:
- api_kwargs.update(max_tokens_fn(16384))
- elif is_qwen and max_tokens_fn:
- api_kwargs.update(max_tokens_fn(65536))
- elif is_kimi and max_tokens_fn:
- # Kimi/Moonshot: 32000 matches Kimi CLI's default
- api_kwargs.update(max_tokens_fn(32000))
elif anthropic_max_out is not None:
api_kwargs["max_tokens"] = anthropic_max_out
- # Kimi: top-level reasoning_effort (unless thinking disabled)
- if is_kimi:
- _kimi_thinking_off = bool(
- reasoning_config
- and isinstance(reasoning_config, dict)
- and reasoning_config.get("enabled") is False
- )
- if not _kimi_thinking_off:
- _kimi_effort = "medium"
- if reasoning_config and isinstance(reasoning_config, dict):
- _e = (reasoning_config.get("effort") or "").strip().lower()
- if _e in ("low", "medium", "high"):
- _kimi_effort = _e
- api_kwargs["reasoning_effort"] = _kimi_effort
-
# extra_body assembly
- extra_body: Dict[str, Any] = {}
+ extra_body: dict[str, Any] = {}
- is_openrouter = params.get("is_openrouter", False)
- is_nous = params.get("is_nous", False)
- is_github_models = params.get("is_github_models", False)
-
- provider_prefs = params.get("provider_preferences")
- if provider_prefs and is_openrouter:
- extra_body["provider"] = provider_prefs
-
- # Kimi extra_body.thinking
- if is_kimi:
- _kimi_thinking_enabled = True
- if reasoning_config and isinstance(reasoning_config, dict):
- if reasoning_config.get("enabled") is False:
- _kimi_thinking_enabled = False
- extra_body["thinking"] = {
- "type": "enabled" if _kimi_thinking_enabled else "disabled",
- }
-
- # Reasoning
+ # Generic reasoning passthrough for unknown providers
if params.get("supports_reasoning", False):
- if is_github_models:
- gh_reasoning = params.get("github_reasoning_extra")
- if gh_reasoning is not None:
- extra_body["reasoning"] = gh_reasoning
+ reasoning_config = params.get("reasoning_config")
+ if reasoning_config is not None:
+ extra_body["reasoning"] = dict(reasoning_config)
else:
- if reasoning_config is not None:
- rc = dict(reasoning_config)
- if is_nous and rc.get("enabled") is False:
- pass # omit for Nous when disabled
- else:
- extra_body["reasoning"] = rc
- else:
- extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
-
- if is_nous:
- extra_body["tags"] = ["product=hermes-agent"]
-
- # Ollama num_ctx
- ollama_ctx = params.get("ollama_num_ctx")
- if ollama_ctx:
- options = extra_body.get("options", {})
- options["num_ctx"] = ollama_ctx
- extra_body["options"] = options
-
- # Ollama/custom think=false
- if params.get("is_custom_provider", False):
- if reasoning_config and isinstance(reasoning_config, dict):
- _effort = (reasoning_config.get("effort") or "").strip().lower()
- _enabled = reasoning_config.get("enabled", True)
- if _effort == "none" or _enabled is False:
- extra_body["think"] = False
-
- if is_qwen:
- extra_body["vl_high_resolution_images"] = True
+ extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
# Merge any pre-built extra_body additions
additions = params.get("extra_body_additions")
@@ -292,6 +187,117 @@ class ChatCompletionsTransport(ProviderTransport):
return api_kwargs
+ def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params):
+ """Build API kwargs using a ProviderProfile — single path, no legacy flags.
+
+ This method replaces the entire flag-based kwargs assembly when a
+ provider_profile is passed. Every quirk comes from the profile object.
+ """
+ from providers.base import OMIT_TEMPERATURE
+
+ # Message preprocessing
+ sanitized = profile.prepare_messages(sanitized)
+
+ # Developer role swap — model-name-based, applies to all providers
+ _model_lower = (model or "").lower()
+ if (
+ sanitized
+ and isinstance(sanitized[0], dict)
+ and sanitized[0].get("role") == "system"
+ and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS)
+ ):
+ sanitized = list(sanitized)
+ sanitized[0] = {**sanitized[0], "role": "developer"}
+
+ api_kwargs: dict[str, Any] = {
+ "model": model,
+ "messages": sanitized,
+ }
+
+ # Temperature
+ if profile.fixed_temperature is OMIT_TEMPERATURE:
+ pass # Don't include temperature at all
+ elif profile.fixed_temperature is not None:
+ api_kwargs["temperature"] = profile.fixed_temperature
+ else:
+ # Use caller's temperature if provided
+ temp = params.get("temperature")
+ if temp is not None:
+ api_kwargs["temperature"] = temp
+
+ # Timeout
+ timeout = params.get("timeout")
+ if timeout is not None:
+ api_kwargs["timeout"] = timeout
+
+ # Tools — apply Moonshot/Kimi schema sanitization regardless of path
+ if tools:
+ if is_moonshot_model(model):
+ tools = sanitize_moonshot_tools(tools)
+ api_kwargs["tools"] = tools
+
+ # max_tokens resolution — priority: ephemeral > user > profile default
+ max_tokens_fn = params.get("max_tokens_param_fn")
+ ephemeral = params.get("ephemeral_max_output_tokens")
+ user_max = params.get("max_tokens")
+ anthropic_max = params.get("anthropic_max_output")
+
+ if ephemeral is not None and max_tokens_fn:
+ api_kwargs.update(max_tokens_fn(ephemeral))
+ elif user_max is not None and max_tokens_fn:
+ api_kwargs.update(max_tokens_fn(user_max))
+ elif profile.default_max_tokens and max_tokens_fn:
+ api_kwargs.update(max_tokens_fn(profile.default_max_tokens))
+ elif anthropic_max is not None:
+ api_kwargs["max_tokens"] = anthropic_max
+
+ # Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.)
+ reasoning_config = params.get("reasoning_config")
+ extra_body_from_profile, top_level_from_profile = (
+ profile.build_api_kwargs_extras(
+ reasoning_config=reasoning_config,
+ supports_reasoning=params.get("supports_reasoning", False),
+ qwen_session_metadata=params.get("qwen_session_metadata"),
+ model=model,
+ ollama_num_ctx=params.get("ollama_num_ctx"),
+ )
+ )
+ api_kwargs.update(top_level_from_profile)
+
+ # extra_body assembly
+ extra_body: dict[str, Any] = {}
+
+ # Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.)
+ profile_body = profile.build_extra_body(
+ session_id=params.get("session_id"),
+ provider_preferences=params.get("provider_preferences"),
+ )
+ if profile_body:
+ extra_body.update(profile_body)
+
+ # Profile's reasoning/thinking extra_body entries
+ if extra_body_from_profile:
+ extra_body.update(extra_body_from_profile)
+
+ # Merge any pre-built extra_body additions from the caller
+ additions = params.get("extra_body_additions")
+ if additions:
+ extra_body.update(additions)
+
+ # Request overrides (user config)
+ overrides = params.get("request_overrides")
+ if overrides:
+ for k, v in overrides.items():
+ if k == "extra_body" and isinstance(v, dict):
+ extra_body.update(v)
+ else:
+ api_kwargs[k] = v
+
+ if extra_body:
+ api_kwargs["extra_body"] = extra_body
+
+ return api_kwargs
+
def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse:
"""Normalize OpenAI ChatCompletion to NormalizedResponse.
@@ -313,7 +319,7 @@ class ChatCompletionsTransport(ProviderTransport):
# Gemini 3 thinking models attach extra_content with
# thought_signature — without replay on the next turn the API
# rejects the request with 400.
- tc_provider_data: Dict[str, Any] = {}
+ tc_provider_data: dict[str, Any] = {}
extra = getattr(tc, "extra_content", None)
if extra is None and hasattr(tc, "model_extra"):
extra = (tc.model_extra or {}).get("extra_content")
@@ -324,12 +330,14 @@ class ChatCompletionsTransport(ProviderTransport):
except Exception:
pass
tc_provider_data["extra_content"] = extra
- tool_calls.append(ToolCall(
- id=tc.id,
- name=tc.function.name,
- arguments=tc.function.arguments,
- provider_data=tc_provider_data or None,
- ))
+ tool_calls.append(
+ ToolCall(
+ id=tc.id,
+ name=tc.function.name,
+ arguments=tc.function.arguments,
+ provider_data=tc_provider_data or None,
+ )
+ )
usage = None
if hasattr(response, "usage") and response.usage:
@@ -347,7 +355,7 @@ class ChatCompletionsTransport(ProviderTransport):
reasoning = getattr(msg, "reasoning", None)
reasoning_content = getattr(msg, "reasoning_content", None)
- provider_data: Dict[str, Any] = {}
+ provider_data: dict[str, Any] = {}
if reasoning_content:
provider_data["reasoning_content"] = reasoning_content
rd = getattr(msg, "reasoning_details", None)
@@ -373,7 +381,7 @@ class ChatCompletionsTransport(ProviderTransport):
return False
return True
- def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]:
+ def extract_cache_stats(self, response: Any) -> dict[str, int] | None:
"""Extract OpenRouter/OpenAI cache stats from prompt_tokens_details."""
usage = getattr(response, "usage", None)
if usage is None:
diff --git a/agent/transports/types.py b/agent/transports/types.py
index 68a807b47c..f0da1eb6f8 100644
--- a/agent/transports/types.py
+++ b/agent/transports/types.py
@@ -12,7 +12,7 @@ from __future__ import annotations
import json
from dataclasses import dataclass, field
-from typing import Any, Dict, List, Optional
+from typing import Any
@dataclass
@@ -32,10 +32,10 @@ class ToolCall:
* Others: ``None``
"""
- id: Optional[str]
+ id: str | None
name: str
arguments: str # JSON string
- provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+ provider_data: dict[str, Any] | None = field(default=None, repr=False)
# ── Backward compatibility ──────────────────────────────────
# The agent loop reads tc.function.name / tc.function.arguments
@@ -47,17 +47,17 @@ class ToolCall:
return "function"
@property
- def function(self) -> "ToolCall":
+ def function(self) -> ToolCall:
"""Return self so tc.function.name / tc.function.arguments work."""
return self
@property
- def call_id(self) -> Optional[str]:
+ def call_id(self) -> str | None:
"""Codex call_id from provider_data, accessed via getattr by _build_assistant_message."""
return (self.provider_data or {}).get("call_id")
@property
- def response_item_id(self) -> Optional[str]:
+ def response_item_id(self) -> str | None:
"""Codex response_item_id from provider_data."""
return (self.provider_data or {}).get("response_item_id")
@@ -101,18 +101,18 @@ class NormalizedResponse:
* Others: ``None``
"""
- content: Optional[str]
- tool_calls: Optional[List[ToolCall]]
+ content: str | None
+ tool_calls: list[ToolCall] | None
finish_reason: str # "stop", "tool_calls", "length", "content_filter"
- reasoning: Optional[str] = None
- usage: Optional[Usage] = None
- provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False)
+ reasoning: str | None = None
+ usage: Usage | None = None
+ provider_data: dict[str, Any] | None = field(default=None, repr=False)
# ── Backward compatibility ──────────────────────────────────
# The shim _nr_to_assistant_message() mapped these from provider_data.
# These properties let NormalizedResponse pass through directly.
@property
- def reasoning_content(self) -> Optional[str]:
+ def reasoning_content(self) -> str | None:
pd = self.provider_data or {}
return pd.get("reasoning_content")
@@ -136,8 +136,9 @@ class NormalizedResponse:
# Factory helpers
# ---------------------------------------------------------------------------
+
def build_tool_call(
- id: Optional[str],
+ id: str | None,
name: str,
arguments: Any,
**provider_fields: Any,
@@ -151,7 +152,7 @@ def build_tool_call(
return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd)
-def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str:
+def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str:
"""Translate a provider-specific stop reason to the normalised set.
Falls back to ``"stop"`` for unknown or ``None`` reasons.
diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py
index fb6a79d1ff..4586bffe73 100644
--- a/hermes_cli/auth.py
+++ b/hermes_cli/auth.py
@@ -374,6 +374,37 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = {
),
}
+# Auto-extend PROVIDER_REGISTRY with any api-key provider registered in
+# providers/ that is not already declared above. New providers only need a
+# providers/*.py file — no edits to this file required.
+try:
+ from providers import list_providers as _list_providers_for_registry
+ for _pp in _list_providers_for_registry():
+ if _pp.name in PROVIDER_REGISTRY:
+ continue
+ if _pp.auth_type != "api_key" or not _pp.env_vars:
+ continue
+ # Skip providers that need custom token resolution (copilot, kimi, zai)
+ # — those are already fully declared above.
+ if _pp.name in {"copilot", "kimi-coding", "kimi-coding-cn", "zai"}:
+ continue
+ _api_key_vars = tuple(v for v in _pp.env_vars if not v.endswith("_BASE_URL") and not v.endswith("_URL"))
+ _base_url_var = next((v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), None)
+ PROVIDER_REGISTRY[_pp.name] = ProviderConfig(
+ id=_pp.name,
+ name=_pp.display_name or _pp.name,
+ auth_type="api_key",
+ inference_base_url=_pp.base_url,
+ api_key_env_vars=_api_key_vars or _pp.env_vars,
+ base_url_env_var=_base_url_var or "",
+ )
+ # Also register aliases so resolve_provider() resolves them
+ for _alias in _pp.aliases:
+ if _alias not in PROVIDER_REGISTRY:
+ PROVIDER_REGISTRY[_alias] = PROVIDER_REGISTRY[_pp.name]
+except Exception:
+ pass
+
# =============================================================================
# Anthropic Key Helper
@@ -1150,6 +1181,17 @@ def resolve_provider(
"vllm": "custom", "llamacpp": "custom",
"llama.cpp": "custom", "llama-cpp": "custom",
}
+ # Extend with aliases declared in providers/*.py that aren't already mapped.
+ # This keeps providers/ as the single source for new aliases while the
+ # hardcoded dict above remains authoritative for existing ones.
+ try:
+ from providers import list_providers as _lp
+ for _pp in _lp():
+ for _alias in _pp.aliases:
+ if _alias not in _PROVIDER_ALIASES:
+ _PROVIDER_ALIASES[_alias] = _pp.name
+ except Exception:
+ pass
normalized = _PROVIDER_ALIASES.get(normalized, normalized)
if normalized == "openrouter":
diff --git a/hermes_cli/config.py b/hermes_cli/config.py
index bb11a5dff5..d690bdc523 100644
--- a/hermes_cli/config.py
+++ b/hermes_cli/config.py
@@ -4252,3 +4252,45 @@ def config_command(args):
print(" hermes config path Show config file path")
print(" hermes config env-path Show .env file path")
sys.exit(1)
+
+
+# ── Profile-driven env var injection ─────────────────────────────────────────
+# Any provider registered in providers/ with auth_type="api_key" automatically
+# gets its env_vars exposed in OPTIONAL_ENV_VARS without editing this file.
+# Runs once at import time.
+
+_profile_env_vars_injected = False
+
+
+def _inject_profile_env_vars() -> None:
+ """Populate OPTIONAL_ENV_VARS from provider profiles not already listed.
+
+ Called once at module load time. Idempotent — repeated calls are no-ops.
+ """
+ global _profile_env_vars_injected
+ if _profile_env_vars_injected:
+ return
+ _profile_env_vars_injected = True
+ try:
+ from providers import list_providers
+ for _pp in list_providers():
+ if _pp.auth_type not in ("api_key",):
+ continue
+ for _var in _pp.env_vars:
+ if _var in OPTIONAL_ENV_VARS:
+ continue
+ _is_key = not _var.endswith("_BASE_URL") and not _var.endswith("_URL")
+ OPTIONAL_ENV_VARS[_var] = {
+ "description": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL override'}",
+ "prompt": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL (leave empty for default)'}",
+ "url": _pp.signup_url or None,
+ "password": _is_key,
+ "category": "provider",
+ "advanced": True,
+ }
+ except Exception:
+ pass
+
+
+# Eagerly inject so that OPTIONAL_ENV_VARS is fully populated at import time.
+_inject_profile_env_vars()
diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py
index dc346ac9b2..b85c95d10c 100644
--- a/hermes_cli/doctor.py
+++ b/hermes_cli/doctor.py
@@ -164,6 +164,84 @@ def _check_gateway_service_linger(issues: list[str]) -> None:
check_warn("Could not verify systemd linger", f"({linger_detail})")
+_APIKEY_PROVIDERS_CACHE: list | None = None
+
+
+def _build_apikey_providers_list() -> list:
+ """Build the API-key provider health-check list once and cache it.
+
+ Tuple format: (name, env_vars, default_url, base_env, supports_models_endpoint)
+ Base list augmented with any ProviderProfile with auth_type="api_key" not
+ already present — adding providers/*.py is sufficient to get into doctor.
+ """
+ _static = [
+ ("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
+ ("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
+ ("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
+ ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
+ ("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
+ ("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
+ ("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
+ ("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
+ ("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
+ ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
+ # MiniMax: the /anthropic endpoint doesn't support /models; use the /v1 surface.
+ ("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
+ ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True),
+ ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
+ ("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
+ ("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
+ # OpenCode Go has no shared /models endpoint; skip the health check.
+ ("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False),
+ ]
+ _known_names = {t[0] for t in _static}
+ # Also index by profile canonical name so profiles without display_name
+ # don't create duplicate entries for providers already in the static list.
+ _known_canonical: set[str] = set()
+ _name_to_canonical = {
+ "Z.AI / GLM": "zai", "Kimi / Moonshot": "kimi-coding",
+ "StepFun Step Plan": "stepfun", "Kimi / Moonshot (China)": "kimi-coding-cn",
+ "Arcee AI": "arcee", "GMI Cloud": "gmi", "DeepSeek": "deepseek",
+ "Hugging Face": "huggingface", "NVIDIA NIM": "nvidia",
+ "Alibaba/DashScope": "alibaba", "MiniMax": "minimax",
+ "MiniMax (China)": "minimax-cn", "Vercel AI Gateway": "ai-gateway",
+ "Kilo Code": "kilocode", "OpenCode Zen": "opencode-zen",
+ "OpenCode Go": "opencode-go",
+ }
+ for _label, _canonical in _name_to_canonical.items():
+ _known_canonical.add(_canonical)
+ try:
+ from providers import list_providers
+ from providers.base import ProviderProfile as _PP
+ for _pp in list_providers():
+ if not isinstance(_pp, _PP) or _pp.auth_type != "api_key" or not _pp.env_vars:
+ continue
+ _label = _pp.display_name or _pp.name
+ if _label in _known_names or _pp.name in _known_canonical:
+ continue
+ # Separate API-key vars from base-URL override vars — the health-check
+ # loop sends the first found value as Authorization: Bearer, so a URL
+ # string must never be picked.
+ _key_vars = tuple(
+ v for v in _pp.env_vars
+ if not v.endswith("_BASE_URL") and not v.endswith("_URL")
+ )
+ _base_var = next(
+ (v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")),
+ None,
+ )
+ if not _key_vars:
+ continue
+ _models_url = (
+ (_pp.models_url or (_pp.base_url.rstrip("/") + "/models"))
+ if _pp.base_url else None
+ )
+ _static.append((_label, _key_vars, _models_url, _base_var, True))
+ except Exception:
+ pass
+ return _static
+
+
def run_doctor(args):
"""Run diagnostic checks."""
should_fix = getattr(args, 'fix', False)
@@ -931,27 +1009,11 @@ def run_doctor(args):
# -- API-key providers --
# Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint)
- # If supports_models_endpoint is False, we skip the health check and just show "configured"
- _apikey_providers = [
- ("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True),
- ("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True),
- ("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True),
- ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True),
- ("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True),
- ("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True),
- ("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True),
- ("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True),
- ("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True),
- ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True),
- # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does.
- ("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True),
- ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True),
- ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True),
- ("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True),
- ("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True),
- # OpenCode Go has no shared /models endpoint; skip the health check.
- ("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False),
- ]
+ # Cached at module level after first build — profiles auto-extend it.
+ global _APIKEY_PROVIDERS_CACHE
+ if _APIKEY_PROVIDERS_CACHE is None:
+ _APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list()
+ _apikey_providers = _APIKEY_PROVIDERS_CACHE
for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers:
_key = ""
for _ev in _env_vars:
diff --git a/hermes_cli/main.py b/hermes_cli/main.py
index 1a01e67c46..a33f3e5463 100644
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@@ -1528,6 +1528,21 @@ def cmd_model(args):
select_provider_and_model(args=args)
+def _is_profile_api_key_provider(provider_id: str) -> bool:
+ """Return True when provider_id maps to a profile with auth_type='api_key'.
+
+ Used as a catch-all in select_provider_and_model() so that new providers
+ declared in providers/*.py automatically dispatch to _model_flow_api_key_provider
+ without requiring an explicit elif branch here.
+ """
+ try:
+ from providers import get_provider_profile
+ _p = get_provider_profile(provider_id)
+ return _p is not None and _p.auth_type == "api_key"
+ except Exception:
+ return False
+
+
def select_provider_and_model(args=None):
"""Core provider selection + model picking logic.
@@ -1820,7 +1835,7 @@ def select_provider_and_model(args=None):
"gmi",
"nvidia",
"ollama-cloud",
- ):
+ ) or _is_profile_api_key_provider(selected_provider):
_model_flow_api_key_provider(config, selected_provider, current_model)
# ── Post-switch cleanup: clear stale OPENAI_BASE_URL ──────────────
@@ -7618,6 +7633,22 @@ def cmd_logs(args):
)
+def _build_provider_choices() -> list[str]:
+ """Build the --provider choices list from CANONICAL_PROVIDERS + 'auto'."""
+ try:
+ from hermes_cli.models import CANONICAL_PROVIDERS as _cp
+ return ["auto"] + [p.slug for p in _cp]
+ except Exception:
+ # Fallback: static list guarantees the CLI always works
+ return [
+ "auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot",
+ "anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry",
+ "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn",
+ "stepfun", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee",
+ "nvidia", "deepseek", "alibaba", "qwen-oauth", "opencode-zen", "opencode-go",
+ ]
+
+
def main():
"""Main entry point for hermes CLI."""
parser = argparse.ArgumentParser(
@@ -7811,30 +7842,7 @@ For more help on a command:
)
chat_parser.add_argument(
"--provider",
- choices=[
- "auto",
- "openrouter",
- "nous",
- "openai-codex",
- "copilot-acp",
- "copilot",
- "anthropic",
- "gemini",
- "xai",
- "ollama-cloud",
- "huggingface",
- "zai",
- "kimi-coding",
- "kimi-coding-cn",
- "stepfun",
- "minimax",
- "minimax-cn",
- "kilocode",
- "xiaomi",
- "arcee",
- "gmi",
- "nvidia",
- ],
+ choices=_build_provider_choices(),
default=None,
help="Inference provider (default: auto)",
)
diff --git a/hermes_cli/models.py b/hermes_cli/models.py
index 28ca6d7dea..96d67b2a24 100644
--- a/hermes_cli/models.py
+++ b/hermes_cli/models.py
@@ -750,6 +750,25 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [
ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"),
]
+# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/
+# that is not already in the list above. Adding providers/*.py is sufficient
+# to expose a new provider in the model picker, /model, and all downstream
+# consumers — no edits to this file needed.
+_canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS}
+try:
+ from providers import list_providers as _list_providers_for_canonical
+ for _pp in _list_providers_for_canonical():
+ if _pp.name in _canonical_slugs:
+ continue
+ if _pp.auth_type in ("oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"):
+ continue # non-api-key flows need bespoke picker UX; skip auto-inject
+ _label = _pp.display_name or _pp.name
+ _desc = _pp.description or f"{_label} (direct API)"
+ CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc))
+ _canonical_slugs.add(_pp.name)
+except Exception:
+ pass
+
# Derived dicts — used throughout the codebase
_PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS}
_PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider
@@ -1884,6 +1903,34 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False)
live = fetch_api_models(api_key, base_url)
if live:
return live
+
+ # ── Profile-based generic live fetch (all simple api-key providers) ──
+ # Handles any provider registered in providers/ with auth_type="api_key".
+ # Replaces per-provider copy-paste blocks (stepfun, gmi, zai, etc.).
+ try:
+ from providers import get_provider_profile
+ from hermes_cli.auth import resolve_api_key_provider_credentials
+
+ _p = get_provider_profile(normalized)
+ if _p and _p.auth_type == "api_key" and _p.base_url:
+ try:
+ creds = resolve_api_key_provider_credentials(normalized)
+ api_key = str(creds.get("api_key") or "").strip()
+ base_url = str(creds.get("base_url") or "").strip()
+ except Exception:
+ api_key, base_url = "", _p.base_url
+ if not base_url:
+ base_url = _p.base_url
+ if api_key:
+ live = _p.fetch_models(api_key=api_key)
+ if live:
+ return live
+ # Use profile's fallback_models if defined
+ if _p.fallback_models:
+ return list(_p.fallback_models)
+ except Exception:
+ pass
+
curated_static = list(_PROVIDER_MODELS.get(normalized, []))
if normalized in _MODELS_DEV_PREFERRED:
return _merge_with_models_dev(normalized, curated_static)
diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py
index 1fe5acc2b6..54538c22e8 100644
--- a/hermes_cli/runtime_provider.py
+++ b/hermes_cli/runtime_provider.py
@@ -214,10 +214,6 @@ def _resolve_runtime_from_pool_entry(
base_url = cfg_base_url or base_url or "https://api.anthropic.com"
elif provider == "openrouter":
base_url = base_url or OPENROUTER_BASE_URL
- elif provider == "xai":
- api_mode = "codex_responses"
- elif provider == "nous":
- api_mode = "chat_completions"
elif provider == "copilot":
api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
@@ -249,6 +245,14 @@ def _resolve_runtime_from_pool_entry(
base_url = re.sub(r"/v1/?$", "", base_url)
else:
configured_provider = str(model_cfg.get("provider") or "").strip().lower()
+ # Use profile api_mode for all other known providers
+ try:
+ from providers import get_provider_profile
+ _p = get_provider_profile(provider)
+ if _p and _p.api_mode:
+ api_mode = _p.api_mode
+ except Exception:
+ pass
# Honour model.base_url from config.yaml when the configured provider
# matches this provider — same pattern as the Anthropic branch above.
# Only override when the pool entry has no explicit base_url (i.e. it
@@ -266,12 +270,21 @@ def _resolve_runtime_from_pool_entry(
from hermes_cli.models import opencode_model_api_mode
api_mode = opencode_model_api_mode(provider, effective_model)
else:
- # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
- # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
- # codex_responses).
- detected = _detect_api_mode_for_url(base_url)
- if detected:
- api_mode = detected
+ # Try profile api_mode first, then auto-detect from URL
+ try:
+ from providers import get_provider_profile
+ _p = get_provider_profile(provider)
+ if _p and _p.api_mode:
+ api_mode = _p.api_mode
+ except Exception:
+ pass
+ if api_mode == "chat_completions":
+ # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
+ # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
+ # codex_responses).
+ detected = _detect_api_mode_for_url(base_url)
+ if detected:
+ api_mode = detected
# OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
# Anthropic SDK prepends its own /v1/messages to the base_url. Strip the
diff --git a/providers/README.md b/providers/README.md
new file mode 100644
index 0000000000..786bc3c2e9
--- /dev/null
+++ b/providers/README.md
@@ -0,0 +1,307 @@
+# providers/
+
+Single source of truth for every inference provider Hermes knows about.
+
+Each provider is declared once here as a `ProviderProfile`. Every other layer —
+auth resolution, transport kwargs, model listing, runtime routing — reads from
+these profiles instead of maintaining its own parallel data.
+
+---
+
+## Directory layout
+
+```
+providers/
+├── base.py ProviderProfile dataclass + OMIT_TEMPERATURE sentinel
+├── __init__.py Registry: register_provider(), get_provider_profile()
+├── README.md This file
+│
+├── # Simple providers — just identity + auth + endpoint
+├── alibaba.py Alibaba Cloud DashScope
+├── arcee.py Arcee AI
+├── bedrock.py AWS Bedrock (api_mode=bedrock_converse)
+├── deepseek.py DeepSeek
+├── huggingface.py Hugging Face Inference API
+├── kilocode.py Kilo Code
+├── minimax.py MiniMax (international + CN)
+├── nvidia.py NVIDIA NIM (default_max_tokens=16384)
+├── ollama_cloud.py Ollama Cloud
+├── stepfun.py StepFun
+├── xiaomi.py Xiaomi MiMo
+├── xai.py xAI Grok (api_mode=codex_responses)
+├── zai.py Z.AI / GLM
+│
+├── # Medium — one or two quirks
+├── anthropic.py Native Anthropic (x-api-key header, api_mode=anthropic_messages)
+├── copilot.py GitHub Copilot (auth_type=copilot, reasoning per model)
+├── copilot_acp.py Copilot ACP subprocess (api_mode=copilot_acp)
+├── custom.py Custom/Ollama local (think=false, num_ctx)
+├── gemini.py Google Gemini AI Studio + Cloud Code OAuth
+├── kimi.py Kimi Coding (OMIT_TEMPERATURE, thinking, dual endpoint)
+├── openai_codex.py OpenAI Codex OAuth (api_mode=codex_responses)
+├── opencode.py OpenCode Zen + Go (per-model api_mode routing)
+│
+├── # Complex — subclasses with multiple overrides
+├── nous.py Nous Portal (tags, attribution, reasoning omit-when-disabled)
+├── openrouter.py OpenRouter (provider preferences, public model fetch)
+├── qwen.py Qwen OAuth (message normalization, cache_control, vl_hires)
+└── vercel.py Vercel AI Gateway (attribution headers, reasoning passthrough)
+```
+
+---
+
+## ProviderProfile fields
+
+```python
+@dataclass
+class ProviderProfile:
+ # Identity
+ name: str # canonical ID — auto-registered as PROVIDER_REGISTRY key for new api-key providers
+ api_mode: str # "chat_completions" | "anthropic_messages" |
+ # "codex_responses" | "bedrock_converse" | "copilot_acp"
+ aliases: tuple # alternate names resolved by get_provider_profile()
+
+ # Auth & endpoints
+ env_vars: tuple # env var names holding the API key, in priority order
+ base_url: str # default inference endpoint
+ models_url: str # explicit models endpoint; falls back to {base_url}/models
+ # set when the models catalog lives at a different URL
+ # (e.g. OpenRouter: public /api/v1/models vs /api/v1 inference)
+ auth_type: str # "api_key" | "oauth_device_code" | "oauth_external" |
+ # "copilot" | "aws" | "external_process"
+
+ # Client-level quirks
+ default_headers: dict # extra HTTP headers sent on every request
+
+ # Request-level quirks
+ fixed_temperature: Any # None = use caller's default; OMIT_TEMPERATURE = don't send
+ default_max_tokens: int|None # inject max_tokens when caller omits it
+ default_aux_model: str # cheap model for auxiliary tasks (compression, vision, etc.)
+ # empty string = use main model (default)
+```
+
+---
+
+## Hooks (override in a subclass)
+
+| Method | When to override |
+|--------|-----------------|
+| `prepare_messages(messages)` | Provider needs message pre-processing (Qwen: string → list-of-parts, cache_control) |
+| `build_extra_body(*, session_id, **ctx)` | Provider-specific `extra_body` fields (Nous: tags, OpenRouter: provider preferences) |
+| `build_api_kwargs_extras(*, reasoning_config, **ctx)` | Returns `(extra_body_additions, top_level_kwargs)` — use when some fields go to `extra_body` and some go top-level (Kimi: `reasoning_effort` top-level; OpenRouter: `reasoning` in extra_body) |
+| `fetch_models(*, api_key, timeout)` | Custom model listing (Anthropic: x-api-key header; OpenRouter: public endpoint, no auth; Bedrock/copilot-acp: return None) |
+
+All hooks have safe defaults — only override what differs from the base.
+
+---
+
+## How to add a new provider
+
+### 1. Simple (standard OpenAI-compatible endpoint)
+
+```python
+# providers/myprovider.py
+from providers import register_provider
+from providers.base import ProviderProfile
+
+myprovider = ProviderProfile(
+ name="myprovider", # must match id in hermes_cli/auth.py PROVIDER_REGISTRY
+ aliases=("my-provider", "myp"),
+ api_mode="chat_completions",
+ env_vars=("MYPROVIDER_API_KEY",),
+ base_url="https://api.myprovider.com/v1",
+ auth_type="api_key",
+)
+
+register_provider(myprovider)
+```
+
+The default `fetch_models()` will call `GET https://api.myprovider.com/v1/models`
+with Bearer auth automatically. No override needed for standard `/v1/models`.
+
+### 2. With quirks (subclass)
+
+```python
+# providers/myprovider.py
+from typing import Any
+from providers import register_provider
+from providers.base import ProviderProfile
+
+
+class MyProviderProfile(ProviderProfile):
+ """My provider — custom reasoning header."""
+
+ def build_api_kwargs_extras(
+ self,
+ *,
+ reasoning_config: dict | None = None,
+ **ctx: Any,
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
+ extra_body: dict[str, Any] = {}
+ if reasoning_config:
+ extra_body["my_reasoning"] = reasoning_config.get("effort", "medium")
+ return extra_body, {}
+
+ def fetch_models(
+ self,
+ *,
+ api_key: str | None = None,
+ timeout: float = 8.0,
+ ) -> list[str] | None:
+ # Override only if your endpoint differs from standard /v1/models
+ return super().fetch_models(api_key=api_key, timeout=timeout)
+
+
+myprovider = MyProviderProfile(
+ name="myprovider",
+ aliases=("myp",),
+ env_vars=("MYPROVIDER_API_KEY",),
+ base_url="https://api.myprovider.com/v1",
+)
+
+register_provider(myprovider)
+```
+
+### 3. Wire it up
+
+After creating the file, add `name` to the `_PROFILE_ACTIVE_PROVIDERS` set in
+`run_agent.py` once you've verified parity against the legacy flag path. Start
+with a simple provider (no message prep, no reasoning quirks) and work up.
+
+---
+
+## fetch_models contract
+
+```python
+def fetch_models(
+ self,
+ *,
+ api_key: str | None = None,
+ timeout: float = 8.0,
+) -> list[str] | None:
+ ...
+```
+
+- Returns `list[str]`: model IDs from the provider's live endpoint.
+- Returns `None`: provider doesn't support REST model listing (Bedrock, copilot-acp),
+ or the request failed. Callers **must** fall back to `_PROVIDER_MODELS` on `None`.
+- Never raises — swallow exceptions and return `None`.
+- Default implementation: `GET {base_url}/models` with Bearer auth. Works for any
+ standard OpenAI-compatible provider.
+
+**Override when:**
+- Auth header is not `Bearer` (Anthropic: `x-api-key`)
+- Endpoint path differs from `/models` AND you can't just set `models_url` (OpenRouter: public endpoint, pass `api_key=None` explicitly)
+- Response format differs (extra wrapping, non-standard `id` field)
+- Provider has no REST endpoint (Bedrock, copilot-acp → return `None`)
+- Filtering needed post-fetch (only tool-capable models, etc.)
+
+Use `models_url` instead of overriding when the only difference is the URL:
+
+```python
+# No subclass needed — just set models_url
+myprovider = ProviderProfile(
+ name="myprovider",
+ base_url="https://api.myprovider.com/v1",
+ models_url="https://catalog.myprovider.com/models", # different host
+)
+```
+
+---
+
+## Debugging
+
+### Check if a provider resolves
+
+```python
+from providers import get_provider_profile
+
+p = get_provider_profile("myprovider")
+print(p) # ProviderProfile(name='myprovider', ...)
+print(p.base_url)
+print(p.api_mode)
+```
+
+### Check all registered providers
+
+```python
+from providers import _REGISTRY
+print(list(_REGISTRY.keys()))
+```
+
+### Test live model fetch
+
+```python
+import os
+from providers import get_provider_profile
+
+p = get_provider_profile("myprovider")
+key = os.getenv("MYPROVIDER_API_KEY")
+models = p.fetch_models(api_key=key, timeout=5.0)
+print(models) # list of model IDs, or None on failure
+```
+
+### Test alias resolution
+
+```python
+from providers import get_provider_profile
+
+# All of these should return the same profile
+assert get_provider_profile("openrouter").name == "openrouter"
+assert get_provider_profile("or").name == "openrouter"
+```
+
+### Run the provider test suite
+
+```bash
+# From the repo root
+source venv/bin/activate
+python -m pytest tests/providers/ -v
+```
+
+### Check ruff + ty compliance
+
+```bash
+source venv/bin/activate
+ruff format providers/*.py
+ruff check providers/*.py --select UP,E,F,I,W
+ty check providers/*.py
+```
+
+---
+
+## Common mistakes
+
+**Wrong `name`** — must be the same string that appears as the key in
+`hermes_cli/auth.py` `PROVIDER_REGISTRY`. New api-key providers auto-register
+into `PROVIDER_REGISTRY` from the profile, so the name IS the key. For providers
+with a pre-existing `PROVIDER_REGISTRY` entry, use the exact `id` field value.
+
+**Wrong `env_vars`** — separate API-key vars from base-URL override vars in the
+tuple. Env vars that end with `_BASE_URL` or `_URL` are treated as URL overrides;
+everything else is treated as an API key. Getting this wrong causes the doctor
+health check to send a URL string as a Bearer token.
+
+**Wrong `base_url`** — several providers have non-obvious paths:
+`stepfun: /step_plan/v1`, `opencode-go: /zen/go/v1`. The profile's `base_url`
+is also used as the `inference_base_url` when auto-registering into `PROVIDER_REGISTRY`
+for new providers, so it must be correct for auth resolution to work.
+
+**Skipping `api_mode`** — defaults to `chat_completions`. Providers that use
+`anthropic_messages`, `codex_responses`, `bedrock_converse`, or `copilot_acp`
+must set it explicitly.
+
+**Forgetting `register_provider()`** — auto-discovery runs `pkgutil.iter_modules`
+over the package and imports each module, but only if `register_provider()` is
+called at module level. Without it the profile is never in `_REGISTRY`.
+
+**`fetch_models` returning the wrong shape** — must return `list[str]` (plain
+model IDs), not `list[tuple]` or `list[dict]`. Callers expect plain strings.
+
+**Wrong `build_api_kwargs_extras` return shape** — must return a 2-tuple
+`(extra_body_dict, top_level_dict)`. Returning a single dict causes a
+`ValueError: not enough values to unpack` in the transport.
+
+**`build_api_kwargs_extras` wrong tuple** — must return `(extra_body_dict,
+top_level_dict)`. Returning a flat dict or swapping the order silently sends
+fields to the wrong place.
diff --git a/providers/__init__.py b/providers/__init__.py
new file mode 100644
index 0000000000..9c80b449a9
--- /dev/null
+++ b/providers/__init__.py
@@ -0,0 +1,76 @@
+"""Provider module registry.
+
+Auto-discovers ProviderProfile instances from providers/*.py modules.
+Each module should define a module-level PROVIDER or PROVIDERS list.
+
+Usage:
+ from providers import get_provider_profile
+ profile = get_provider_profile("nvidia") # returns ProviderProfile or None
+ profile = get_provider_profile("kimi") # checks name + aliases
+"""
+
+from __future__ import annotations
+
+from providers.base import OMIT_TEMPERATURE, ProviderProfile # noqa: F401
+
+_REGISTRY: dict[str, ProviderProfile] = {}
+_ALIASES: dict[str, str] = {}
+_discovered = False
+
+
+def register_provider(profile: ProviderProfile) -> None:
+ """Register a provider profile by name and aliases."""
+ _REGISTRY[profile.name] = profile
+ for alias in profile.aliases:
+ _ALIASES[alias] = profile.name
+
+
+def get_provider_profile(name: str) -> ProviderProfile | None:
+ """Look up a provider profile by name or alias.
+
+ Returns None if the provider has no profile (falls back to generic).
+ """
+ if not _discovered:
+ _discover_providers()
+ canonical = _ALIASES.get(name, name)
+ return _REGISTRY.get(canonical)
+
+
+def list_providers() -> list[ProviderProfile]:
+ """Return all registered provider profiles (one per canonical name)."""
+ if not _discovered:
+ _discover_providers()
+ # Deduplicate: _REGISTRY has canonical names; _ALIASES points to same objects
+ seen: set[int] = set()
+ result: list[ProviderProfile] = []
+ for profile in _REGISTRY.values():
+ pid = id(profile)
+ if pid not in seen:
+ seen.add(pid)
+ result.append(profile)
+ return result
+
+
+def _discover_providers() -> None:
+ """Import all provider modules to trigger registration."""
+ global _discovered
+ if _discovered:
+ return
+ _discovered = True
+
+ import importlib
+ import pkgutil
+
+ import providers as _pkg
+
+ for _importer, modname, _ispkg in pkgutil.iter_modules(_pkg.__path__):
+ if modname.startswith("_") or modname == "base":
+ continue
+ try:
+ importlib.import_module(f"providers.{modname}")
+ except ImportError as e:
+ import logging
+
+ logging.getLogger(__name__).warning(
+ "Failed to import provider module %s: %s", modname, e
+ )
diff --git a/providers/alibaba.py b/providers/alibaba.py
new file mode 100644
index 0000000000..5772bc87e6
--- /dev/null
+++ b/providers/alibaba.py
@@ -0,0 +1,13 @@
+"""Alibaba Cloud DashScope provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+alibaba = ProviderProfile(
+ name="alibaba",
+ aliases=("dashscope", "alibaba-cloud", "qwen-dashscope"),
+ env_vars=("DASHSCOPE_API_KEY",),
+ base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1",
+)
+
+register_provider(alibaba)
diff --git a/providers/anthropic.py b/providers/anthropic.py
new file mode 100644
index 0000000000..f1f45eb82c
--- /dev/null
+++ b/providers/anthropic.py
@@ -0,0 +1,52 @@
+"""Native Anthropic provider profile."""
+
+import json
+import logging
+import urllib.request
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+logger = logging.getLogger(__name__)
+
+
+class AnthropicProfile(ProviderProfile):
+ """Native Anthropic — uses x-api-key header, not Bearer."""
+
+ def fetch_models(
+ self,
+ *,
+ api_key: str | None = None,
+ timeout: float = 8.0,
+ ) -> list[str] | None:
+ """Anthropic uses x-api-key header and anthropic-version."""
+ if not api_key:
+ return None
+ try:
+ req = urllib.request.Request("https://api.anthropic.com/v1/models")
+ req.add_header("x-api-key", api_key)
+ req.add_header("anthropic-version", "2023-06-01")
+ req.add_header("Accept", "application/json")
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
+ data = json.loads(resp.read().decode())
+ return [
+ m["id"]
+ for m in data.get("data", [])
+ if isinstance(m, dict) and "id" in m
+ ]
+ except Exception as exc:
+ logger.debug("fetch_models(anthropic): %s", exc)
+ return None
+
+
+anthropic = AnthropicProfile(
+ name="anthropic",
+ aliases=("claude", "claude-oauth", "claude-code"),
+ api_mode="anthropic_messages",
+ env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"),
+ base_url="https://api.anthropic.com",
+ auth_type="api_key",
+ default_aux_model="claude-haiku-4-5-20251001",
+)
+
+register_provider(anthropic)
diff --git a/providers/arcee.py b/providers/arcee.py
new file mode 100644
index 0000000000..46afb6e16e
--- /dev/null
+++ b/providers/arcee.py
@@ -0,0 +1,13 @@
+"""Arcee AI provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+arcee = ProviderProfile(
+ name="arcee",
+ aliases=("arcee-ai", "arceeai"),
+ env_vars=("ARCEEAI_API_KEY",),
+ base_url="https://api.arcee.ai/api/v1",
+)
+
+register_provider(arcee)
diff --git a/providers/base.py b/providers/base.py
new file mode 100644
index 0000000000..2c685f9b81
--- /dev/null
+++ b/providers/base.py
@@ -0,0 +1,165 @@
+"""Provider profile base class.
+
+A ProviderProfile declares everything about an inference provider in one place:
+auth, endpoints, client quirks, request-time quirks. The transport reads this
+instead of receiving 20+ boolean flags.
+
+Provider profiles are DECLARATIVE — they describe the provider's behavior.
+They do NOT own client construction, credential rotation, or streaming.
+Those stay on AIAgent.
+"""
+
+from __future__ import annotations
+
+import logging
+from dataclasses import dataclass, field
+from typing import Any
+
+logger = logging.getLogger(__name__)
+
+# Sentinel for "omit temperature entirely" (Kimi: server manages it)
+OMIT_TEMPERATURE = object()
+
+
+@dataclass
+class ProviderProfile:
+ """Base provider profile — subclass or instantiate with overrides."""
+
+ # ── Identity ─────────────────────────────────────────────
+ name: str
+ api_mode: str = "chat_completions"
+ aliases: tuple = ()
+
+ # ── Human-readable metadata ───────────────────────────────
+ display_name: str = "" # e.g. "GMI Cloud" — shown in picker/labels
+ description: str = "" # e.g. "GMI Cloud (multi-model direct API)" — picker subtitle
+ signup_url: str = "" # e.g. "https://www.gmicloud.ai/" — shown during setup
+
+ # ── Auth & endpoints ─────────────────────────────────────
+ env_vars: tuple = ()
+ base_url: str = ""
+ models_url: str = "" # explicit models endpoint; falls back to {base_url}/models
+ auth_type: str = "api_key" # api_key|oauth_device_code|oauth_external|copilot|aws_sdk
+
+ # ── Model catalog ─────────────────────────────────────────
+ # fallback_models: curated list shown in /model picker when live fetch fails.
+ # Only agentic models that support tool calling should appear here.
+ fallback_models: tuple = ()
+
+ # hostname: base hostname for URL→provider reverse-mapping in model_metadata.py
+ # e.g. "api.gmi-serving.com". Derived from base_url when empty.
+ hostname: str = ""
+
+ # ── Client-level quirks (set once at client construction) ─
+ default_headers: dict[str, str] = field(default_factory=dict)
+
+ # ── Request-level quirks ─────────────────────────────────
+ # Temperature: None = use caller's default, OMIT_TEMPERATURE = don't send
+ fixed_temperature: Any = None
+ default_max_tokens: int | None = None
+ default_aux_model: str = (
+ "" # cheap model for auxiliary tasks (compression, vision, etc.)
+ )
+ # empty = use main model
+
+ # ── Hooks (override in subclass for complex providers) ───
+
+ def get_hostname(self) -> str:
+ """Return the provider's base hostname for URL-based detection.
+
+ Uses self.hostname if set explicitly, otherwise derives it from base_url.
+ e.g. 'https://api.gmi-serving.com/v1' → 'api.gmi-serving.com'
+ """
+ if self.hostname:
+ return self.hostname
+ if self.base_url:
+ from urllib.parse import urlparse
+ return urlparse(self.base_url).hostname or ""
+ return ""
+
+ def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+ """Provider-specific message preprocessing.
+
+ Called AFTER codex field sanitization, BEFORE developer role swap.
+ Default: pass-through.
+ """
+ return messages
+
+ def build_extra_body(
+ self, *, session_id: str | None = None, **context: Any
+ ) -> dict[str, Any]:
+ """Provider-specific extra_body fields.
+
+ Merged into the API kwargs extra_body. Default: empty dict.
+ """
+ return {}
+
+ def build_api_kwargs_extras(
+ self,
+ *,
+ reasoning_config: dict | None = None,
+ **context: Any,
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
+ """Provider-specific kwargs split between extra_body and top-level api_kwargs.
+
+ Returns (extra_body_additions, top_level_kwargs).
+ The transport merges extra_body_additions into extra_body, and
+ top_level_kwargs directly into api_kwargs.
+
+ This split exists because some providers put reasoning config in
+ extra_body (OpenRouter: extra_body.reasoning) while others put it
+ as top-level api_kwargs (Kimi: api_kwargs.reasoning_effort).
+
+ Default: ({}, {}).
+ """
+ return {}, {}
+
+ def fetch_models(
+ self,
+ *,
+ api_key: str | None = None,
+ timeout: float = 8.0,
+ ) -> list[str] | None:
+ """Fetch the live model list from the provider's models endpoint.
+
+ Returns a list of model ID strings, or None if the fetch failed or
+ the provider does not support live model listing.
+
+ Resolution order for the endpoint URL:
+ 1. self.models_url (explicit override — use when the models
+ endpoint differs from the inference base URL, e.g. OpenRouter
+ exposes a public catalog at /api/v1/models while inference is
+ at /api/v1)
+ 2. self.base_url + "/models" (standard OpenAI-compat fallback)
+
+ The default implementation sends Bearer auth when api_key is given
+ and forwards self.default_headers. Override to customise auth, path,
+ response shape, or to return None for providers with no REST catalog.
+
+ Callers must always fall back to the static _PROVIDER_MODELS list
+ when this returns None.
+ """
+ url = (self.models_url or "").strip()
+ if not url:
+ if not self.base_url:
+ return None
+ url = self.base_url.rstrip("/") + "/models"
+
+ import json
+ import urllib.request
+
+ req = urllib.request.Request(url)
+ if api_key:
+ req.add_header("Authorization", f"Bearer {api_key}")
+ req.add_header("Accept", "application/json")
+ for k, v in self.default_headers.items():
+ req.add_header(k, v)
+
+ try:
+ with urllib.request.urlopen(req, timeout=timeout) as resp:
+ data = json.loads(resp.read().decode())
+ items = data if isinstance(data, list) else data.get("data", [])
+ return [m["id"] for m in items if isinstance(m, dict) and "id" in m]
+ except Exception as exc:
+ logger.debug("fetch_models(%s): %s", self.name, exc)
+ return None
diff --git a/providers/bedrock.py b/providers/bedrock.py
new file mode 100644
index 0000000000..6fdbbe834d
--- /dev/null
+++ b/providers/bedrock.py
@@ -0,0 +1,29 @@
+"""AWS Bedrock provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+
+class BedrockProfile(ProviderProfile):
+ """AWS Bedrock — no REST /v1/models endpoint; uses AWS SDK."""
+
+ def fetch_models(
+ self,
+ *,
+ api_key: str | None = None,
+ timeout: float = 8.0,
+ ) -> list[str] | None:
+ """Bedrock model listing requires AWS SDK, not a REST call."""
+ return None
+
+
+bedrock = BedrockProfile(
+ name="bedrock",
+ aliases=("aws", "aws-bedrock", "amazon-bedrock", "amazon"),
+ api_mode="bedrock_converse",
+ env_vars=(), # AWS SDK credentials — not env vars
+ base_url="https://bedrock-runtime.us-east-1.amazonaws.com",
+ auth_type="aws_sdk",
+)
+
+register_provider(bedrock)
diff --git a/providers/copilot.py b/providers/copilot.py
new file mode 100644
index 0000000000..d4409c108d
--- /dev/null
+++ b/providers/copilot.py
@@ -0,0 +1,58 @@
+"""Copilot / GitHub Models provider profile.
+
+Copilot uses per-model api_mode routing:
+ - GPT-5+ / Codex models → codex_responses
+ - Claude models → anthropic_messages
+ - Everything else → chat_completions (this profile covers that subset)
+
+Key quirks for the chat_completions subset:
+ - Editor attribution headers (via copilot_default_headers())
+ - GitHub Models reasoning extra_body (model-catalog gated)
+"""
+
+from typing import Any
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+
+class CopilotProfile(ProviderProfile):
+ """GitHub Copilot / GitHub Models — editor headers + reasoning."""
+
+ def build_api_kwargs_extras(
+ self,
+ *,
+ model: str | None = None,
+ reasoning_config: dict | None = None,
+ supports_reasoning: bool = False,
+ **ctx,
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
+ extra_body: dict[str, Any] = {}
+ if supports_reasoning and model:
+ try:
+ from hermes_cli.models import github_model_reasoning_efforts
+
+ supported_efforts = github_model_reasoning_efforts(model)
+ if supported_efforts and reasoning_config:
+ effort = reasoning_config.get("effort", "medium")
+ # Normalize non-standard effort levels to the nearest supported
+ if effort == "xhigh":
+ effort = "high"
+ if effort in supported_efforts:
+ extra_body["reasoning"] = {"effort": effort}
+ elif supported_efforts:
+ extra_body["reasoning"] = {"effort": "medium"}
+ except Exception:
+ pass
+ return extra_body, {}
+
+
+copilot = CopilotProfile(
+ name="copilot",
+ aliases=("github-copilot", "github-models", "github-model", "github"),
+ env_vars=("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"),
+ base_url="https://api.githubcopilot.com",
+ auth_type="copilot",
+)
+
+register_provider(copilot)
diff --git a/providers/copilot_acp.py b/providers/copilot_acp.py
new file mode 100644
index 0000000000..21ec7da2e9
--- /dev/null
+++ b/providers/copilot_acp.py
@@ -0,0 +1,34 @@
+"""GitHub Copilot ACP provider profile.
+
+copilot-acp uses an external ACP subprocess — NOT the standard
+transport. api_mode="copilot_acp" is handled separately in run_agent.py.
+The profile captures auth + endpoint metadata for registry migration.
+"""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+
+class CopilotACPProfile(ProviderProfile):
+ """GitHub Copilot ACP — external process, no REST models endpoint."""
+
+ def fetch_models(
+ self,
+ *,
+ api_key: str | None = None,
+ timeout: float = 8.0,
+ ) -> list[str] | None:
+ """Model listing is handled by the ACP subprocess."""
+ return None
+
+
+copilot_acp = CopilotACPProfile(
+ name="copilot-acp",
+ aliases=("github-copilot-acp", "copilot-acp-agent"),
+ api_mode="chat_completions", # ACP subprocess uses chat_completions routing
+ env_vars=(), # Managed by ACP subprocess
+ base_url="acp://copilot", # ACP internal scheme
+ auth_type="external_process",
+)
+
+register_provider(copilot_acp)
diff --git a/providers/custom.py b/providers/custom.py
new file mode 100644
index 0000000000..5707571ceb
--- /dev/null
+++ b/providers/custom.py
@@ -0,0 +1,71 @@
+"""Custom / Ollama (local) provider profile.
+
+Covers any endpoint registered as provider="custom", including local
+Ollama instances. Key quirks:
+ - ollama_num_ctx → extra_body.options.num_ctx (local context window)
+ - reasoning_config disabled → extra_body.think = False
+"""
+
+from typing import Any
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+
+class CustomProfile(ProviderProfile):
+ """Custom/Ollama local provider — think=false and num_ctx support."""
+
+ def build_api_kwargs_extras(
+ self,
+ *,
+ reasoning_config: dict | None = None,
+ ollama_num_ctx: int | None = None,
+ **ctx: Any,
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
+ extra_body: dict[str, Any] = {}
+
+ # Ollama context window
+ if ollama_num_ctx:
+ options = extra_body.get("options", {})
+ options["num_ctx"] = ollama_num_ctx
+ extra_body["options"] = options
+
+ # Disable thinking when reasoning is turned off
+ if reasoning_config and isinstance(reasoning_config, dict):
+ _effort = (reasoning_config.get("effort") or "").strip().lower()
+ _enabled = reasoning_config.get("enabled", True)
+ if _effort == "none" or _enabled is False:
+ extra_body["think"] = False
+
+ return extra_body, {}
+
+ def fetch_models(
+ self,
+ *,
+ api_key: str | None = None,
+ timeout: float = 8.0,
+ ) -> list[str] | None:
+ """Custom/Ollama: base_url is user-configured; fetch if set."""
+ if not self.base_url:
+ return None
+ return super().fetch_models(api_key=api_key, timeout=timeout)
+
+
+custom = CustomProfile(
+ name="custom",
+ aliases=(
+ "ollama",
+ "local",
+ "lmstudio",
+ "lm-studio",
+ "lm_studio",
+ "vllm",
+ "llamacpp",
+ "llama.cpp",
+ "llama-cpp",
+ ),
+ env_vars=(), # No fixed key — custom endpoint
+ base_url="", # User-configured
+)
+
+register_provider(custom)
diff --git a/providers/deepseek.py b/providers/deepseek.py
new file mode 100644
index 0000000000..59d738f50f
--- /dev/null
+++ b/providers/deepseek.py
@@ -0,0 +1,20 @@
+"""DeepSeek provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+deepseek = ProviderProfile(
+ name="deepseek",
+ aliases=("deepseek-chat",),
+ env_vars=("DEEPSEEK_API_KEY",),
+ display_name="DeepSeek",
+ description="DeepSeek — native DeepSeek API",
+ signup_url="https://platform.deepseek.com/",
+ fallback_models=(
+ "deepseek-chat",
+ "deepseek-reasoner",
+ ),
+ base_url="https://api.deepseek.com/v1",
+)
+
+register_provider(deepseek)
diff --git a/providers/gemini.py b/providers/gemini.py
new file mode 100644
index 0000000000..216057fb9f
--- /dev/null
+++ b/providers/gemini.py
@@ -0,0 +1,34 @@
+"""Google Gemini provider profiles.
+
+gemini: Google AI Studio (API key) — uses GeminiNativeClient
+google-gemini-cli: Google Cloud Code Assist (OAuth) — uses GeminiCloudCodeClient
+
+Both report api_mode="chat_completions" but use custom native clients
+that bypass the standard OpenAI transport. The profile captures auth
+and endpoint metadata for auth.py / runtime_provider.py migration.
+"""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+gemini = ProviderProfile(
+ name="gemini",
+ aliases=("google", "google-gemini", "google-ai-studio"),
+ api_mode="chat_completions",
+ env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"),
+ base_url="https://generativelanguage.googleapis.com/v1beta",
+ auth_type="api_key",
+ default_aux_model="gemini-3-flash-preview",
+)
+
+google_gemini_cli = ProviderProfile(
+ name="google-gemini-cli",
+ aliases=("gemini-cli", "gemini-oauth"),
+ api_mode="chat_completions",
+ env_vars=(), # OAuth — no API key
+ base_url="cloudcode-pa://google", # Cloud Code Assist internal scheme
+ auth_type="oauth_external",
+)
+
+register_provider(gemini)
+register_provider(google_gemini_cli)
diff --git a/providers/gmi.py b/providers/gmi.py
new file mode 100644
index 0000000000..a7cc32e552
--- /dev/null
+++ b/providers/gmi.py
@@ -0,0 +1,26 @@
+"""GMI Cloud provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+gmi = ProviderProfile(
+ name="gmi",
+ aliases=("gmi-cloud", "gmicloud"),
+ display_name="GMI Cloud",
+ description="GMI Cloud — multi-model direct API (slash-form model IDs)",
+ signup_url="https://www.gmicloud.ai/",
+ env_vars=("GMI_API_KEY", "GMI_BASE_URL"),
+ base_url="https://api.gmi-serving.com/v1",
+ auth_type="api_key",
+ default_aux_model="google/gemini-3.1-flash-lite-preview",
+ fallback_models=(
+ "zai-org/GLM-5.1-FP8",
+ "deepseek-ai/DeepSeek-V3.2",
+ "moonshotai/Kimi-K2.5",
+ "google/gemini-3.1-flash-lite-preview",
+ "anthropic/claude-sonnet-4.6",
+ "openai/gpt-5.4",
+ ),
+)
+
+register_provider(gmi)
diff --git a/providers/huggingface.py b/providers/huggingface.py
new file mode 100644
index 0000000000..039d5a1319
--- /dev/null
+++ b/providers/huggingface.py
@@ -0,0 +1,20 @@
+"""Hugging Face provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+huggingface = ProviderProfile(
+ name="huggingface",
+ aliases=("hf", "hugging-face", "huggingface-hub"),
+ env_vars=("HF_TOKEN",),
+ display_name="HuggingFace",
+ description="HuggingFace Inference API",
+ signup_url="https://huggingface.co/settings/tokens",
+ fallback_models=(
+ "Qwen/Qwen3.5-72B-Instruct",
+ "deepseek-ai/DeepSeek-V3.2",
+ ),
+ base_url="https://router.huggingface.co/v1",
+)
+
+register_provider(huggingface)
diff --git a/providers/kilocode.py b/providers/kilocode.py
new file mode 100644
index 0000000000..23123966aa
--- /dev/null
+++ b/providers/kilocode.py
@@ -0,0 +1,14 @@
+"""Kilo Code provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+kilocode = ProviderProfile(
+ name="kilocode",
+ aliases=("kilo-code", "kilo", "kilo-gateway"),
+ env_vars=("KILOCODE_API_KEY",),
+ base_url="https://api.kilo.ai/api/gateway",
+ default_aux_model="google/gemini-3-flash-preview",
+)
+
+register_provider(kilocode)
diff --git a/providers/kimi.py b/providers/kimi.py
new file mode 100644
index 0000000000..b5cf53a801
--- /dev/null
+++ b/providers/kimi.py
@@ -0,0 +1,71 @@
+"""Kimi / Moonshot provider profiles.
+
+Kimi has dual endpoints:
+ - sk-kimi-* keys → api.kimi.com/coding (Anthropic Messages API)
+ - legacy keys → api.moonshot.ai/v1 (OpenAI chat completions)
+
+This module covers the chat_completions path (/v1 endpoint).
+"""
+
+from typing import Any
+
+from providers import register_provider
+from providers.base import OMIT_TEMPERATURE, ProviderProfile
+
+
+class KimiProfile(ProviderProfile):
+ """Kimi/Moonshot — temperature omitted, thinking + reasoning_effort."""
+
+ def build_api_kwargs_extras(
+ self, *, reasoning_config: dict | None = None, **context
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
+ """Kimi uses extra_body.thinking + top-level reasoning_effort."""
+ extra_body = {}
+ top_level = {}
+
+ if not reasoning_config or not isinstance(reasoning_config, dict):
+ # No config → thinking enabled, default effort
+ extra_body["thinking"] = {"type": "enabled"}
+ top_level["reasoning_effort"] = "medium"
+ return extra_body, top_level
+
+ enabled = reasoning_config.get("enabled", True)
+ if enabled is False:
+ extra_body["thinking"] = {"type": "disabled"}
+ return extra_body, top_level
+
+ # Enabled
+ extra_body["thinking"] = {"type": "enabled"}
+ effort = (reasoning_config.get("effort") or "").strip().lower()
+ if effort in ("low", "medium", "high"):
+ top_level["reasoning_effort"] = effort
+ else:
+ top_level["reasoning_effort"] = "medium"
+
+ return extra_body, top_level
+
+
+kimi = KimiProfile(
+ name="kimi-coding",
+ aliases=("kimi", "moonshot", "kimi-for-coding"),
+ env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"),
+ base_url="https://api.moonshot.ai/v1",
+ fixed_temperature=OMIT_TEMPERATURE,
+ default_max_tokens=32000,
+ default_headers={"User-Agent": "hermes-agent/1.0"},
+ default_aux_model="kimi-k2-turbo-preview",
+)
+
+kimi_cn = KimiProfile(
+ name="kimi-coding-cn",
+ aliases=("kimi-cn", "moonshot-cn"),
+ env_vars=("KIMI_CN_API_KEY",),
+ base_url="https://api.moonshot.cn/v1",
+ fixed_temperature=OMIT_TEMPERATURE,
+ default_max_tokens=32000,
+ default_headers={"User-Agent": "hermes-agent/1.0"},
+ default_aux_model="kimi-k2-turbo-preview",
+)
+
+register_provider(kimi)
+register_provider(kimi_cn)
diff --git a/providers/minimax.py b/providers/minimax.py
new file mode 100644
index 0000000000..8fb106a8bf
--- /dev/null
+++ b/providers/minimax.py
@@ -0,0 +1,31 @@
+"""MiniMax provider profiles (international + China).
+
+Both use anthropic_messages api_mode — their inference_base_url
+ends with /anthropic which triggers auto-detection to anthropic_messages.
+"""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+minimax = ProviderProfile(
+ name="minimax",
+ aliases=("mini-max",),
+ api_mode="anthropic_messages",
+ env_vars=("MINIMAX_API_KEY",),
+ base_url="https://api.minimax.io/anthropic",
+ auth_type="api_key",
+ default_aux_model="MiniMax-M2.7",
+)
+
+minimax_cn = ProviderProfile(
+ name="minimax-cn",
+ aliases=("minimax-china", "minimax_cn"),
+ api_mode="anthropic_messages",
+ env_vars=("MINIMAX_CN_API_KEY",),
+ base_url="https://api.minimaxi.com/anthropic",
+ auth_type="api_key",
+ default_aux_model="MiniMax-M2.7",
+)
+
+register_provider(minimax)
+register_provider(minimax_cn)
diff --git a/providers/nous.py b/providers/nous.py
new file mode 100644
index 0000000000..f89e56c23a
--- /dev/null
+++ b/providers/nous.py
@@ -0,0 +1,53 @@
+"""Nous Portal provider profile."""
+
+from typing import Any
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+
+class NousProfile(ProviderProfile):
+ """Nous Portal — product tags, reasoning with Nous-specific omission."""
+
+ def build_extra_body(
+ self, *, session_id: str | None = None, **context
+ ) -> dict[str, Any]:
+ return {"tags": ["product=hermes-agent"]}
+
+ def build_api_kwargs_extras(
+ self,
+ *,
+ reasoning_config: dict | None = None,
+ supports_reasoning: bool = False,
+ **context,
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
+ """Nous: passes full reasoning_config, but OMITS when disabled."""
+ extra_body = {}
+ if supports_reasoning:
+ if reasoning_config is not None:
+ rc = dict(reasoning_config)
+ if rc.get("enabled") is False:
+ pass # Nous omits reasoning when disabled
+ else:
+ extra_body["reasoning"] = rc
+ else:
+ extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+ return extra_body, {}
+
+
+nous = NousProfile(
+ name="nous",
+ aliases=("nous-portal", "nousresearch"),
+ env_vars=("NOUS_API_KEY",),
+ display_name="Nous Research",
+ description="Nous Research — Hermes model family",
+ signup_url="https://nousresearch.com/",
+ fallback_models=(
+ "hermes-3-405b",
+ "hermes-3-70b",
+ ),
+ base_url="https://inference.nousresearch.com/v1",
+ auth_type="oauth_device_code",
+)
+
+register_provider(nous)
diff --git a/providers/nvidia.py b/providers/nvidia.py
new file mode 100644
index 0000000000..f6fdc550f6
--- /dev/null
+++ b/providers/nvidia.py
@@ -0,0 +1,21 @@
+"""NVIDIA NIM provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+nvidia = ProviderProfile(
+ name="nvidia",
+ aliases=("nvidia-nim",),
+ env_vars=("NVIDIA_API_KEY",),
+ display_name="NVIDIA NIM",
+ description="NVIDIA NIM — accelerated inference",
+ signup_url="https://build.nvidia.com/",
+ fallback_models=(
+ "nvidia/llama-3.1-nemotron-70b-instruct",
+ "nvidia/llama-3.3-70b-instruct",
+ ),
+ base_url="https://integrate.api.nvidia.com/v1",
+ default_max_tokens=16384,
+)
+
+register_provider(nvidia)
diff --git a/providers/ollama_cloud.py b/providers/ollama_cloud.py
new file mode 100644
index 0000000000..f25c442a40
--- /dev/null
+++ b/providers/ollama_cloud.py
@@ -0,0 +1,14 @@
+"""Ollama Cloud provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+ollama_cloud = ProviderProfile(
+ name="ollama-cloud",
+ aliases=("ollama_cloud",),
+ default_aux_model="nemotron-3-nano:30b",
+ env_vars=("OLLAMA_API_KEY",),
+ base_url="https://ollama.com/v1",
+)
+
+register_provider(ollama_cloud)
diff --git a/providers/openai_codex.py b/providers/openai_codex.py
new file mode 100644
index 0000000000..8124b9efe4
--- /dev/null
+++ b/providers/openai_codex.py
@@ -0,0 +1,15 @@
+"""OpenAI Codex (Responses API) provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+openai_codex = ProviderProfile(
+ name="openai-codex",
+ aliases=("codex", "openai_codex"),
+ api_mode="codex_responses",
+ env_vars=(), # OAuth external — no API key
+ base_url="https://chatgpt.com/backend-api/codex",
+ auth_type="oauth_external",
+)
+
+register_provider(openai_codex)
diff --git a/providers/opencode.py b/providers/opencode.py
new file mode 100644
index 0000000000..f720e8f5fa
--- /dev/null
+++ b/providers/opencode.py
@@ -0,0 +1,30 @@
+"""OpenCode provider profiles (Zen + Go).
+
+Both use per-model api_mode routing:
+ - OpenCode Zen: Claude → anthropic_messages, GPT-5/Codex → codex_responses,
+ everything else → chat_completions (this profile)
+ - OpenCode Go: MiniMax → anthropic_messages, GLM/Kimi → chat_completions
+ (this profile)
+"""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+opencode_zen = ProviderProfile(
+ name="opencode-zen",
+ aliases=("opencode", "opencode_zen", "zen"),
+ env_vars=("OPENCODE_ZEN_API_KEY",),
+ base_url="https://opencode.ai/zen/v1",
+ default_aux_model="gemini-3-flash",
+)
+
+opencode_go = ProviderProfile(
+ name="opencode-go",
+ aliases=("opencode_go", "go", "opencode-go-sub"),
+ env_vars=("OPENCODE_GO_API_KEY",),
+ base_url="https://opencode.ai/zen/go/v1",
+ default_aux_model="glm-5",
+)
+
+register_provider(opencode_zen)
+register_provider(opencode_go)
diff --git a/providers/openrouter.py b/providers/openrouter.py
new file mode 100644
index 0000000000..6aad8fc65d
--- /dev/null
+++ b/providers/openrouter.py
@@ -0,0 +1,86 @@
+"""OpenRouter provider profile."""
+
+import logging
+from typing import Any
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+logger = logging.getLogger(__name__)
+
+_CACHE: list[str] | None = None
+
+
+class OpenRouterProfile(ProviderProfile):
+ """OpenRouter aggregator — provider preferences, reasoning config passthrough."""
+
+ def fetch_models(
+ self,
+ *,
+ api_key: str | None = None,
+ timeout: float = 8.0,
+ ) -> list[str] | None:
+ """Fetch from public OpenRouter catalog — no auth required.
+
+ Note: Tool-call capability filtering is applied by hermes_cli/models.py
+ via fetch_openrouter_models() → _openrouter_model_supports_tools(), not
+ here. The picker early-returns via the dedicated openrouter path before
+ reaching this method, so filtering here would be unreachable.
+ """
+ global _CACHE # noqa: PLW0603
+ if _CACHE is not None:
+ return _CACHE
+ try:
+ result = super().fetch_models(api_key=None, timeout=timeout)
+ if result is not None:
+ _CACHE = result
+ return result
+ except Exception as exc:
+ logger.debug("fetch_models(openrouter): %s", exc)
+ return None
+
+ def build_extra_body(
+ self, *, session_id: str | None = None, **context: Any
+ ) -> dict[str, Any]:
+ body: dict[str, Any] = {}
+ prefs = context.get("provider_preferences")
+ if prefs:
+ body["provider"] = prefs
+ return body
+
+ def build_api_kwargs_extras(
+ self,
+ *,
+ reasoning_config: dict | None = None,
+ supports_reasoning: bool = False,
+ **context: Any,
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
+ """OpenRouter passes the full reasoning_config dict as extra_body.reasoning."""
+ extra_body: dict[str, Any] = {}
+ if supports_reasoning:
+ if reasoning_config is not None:
+ extra_body["reasoning"] = dict(reasoning_config)
+ else:
+ extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+ return extra_body, {}
+
+
+openrouter = OpenRouterProfile(
+ name="openrouter",
+ aliases=("or",),
+ env_vars=("OPENROUTER_API_KEY",),
+ display_name="OpenRouter",
+ description="OpenRouter — unified API for 200+ models",
+ signup_url="https://openrouter.ai/keys",
+ base_url="https://openrouter.ai/api/v1",
+ models_url="https://openrouter.ai/api/v1/models",
+ fallback_models=(
+ "anthropic/claude-sonnet-4.6",
+ "openai/gpt-5.4",
+ "deepseek/deepseek-chat",
+ "google/gemini-3-flash-preview",
+ "qwen/qwen3-plus",
+ ),
+)
+
+register_provider(openrouter)
diff --git a/providers/qwen.py b/providers/qwen.py
new file mode 100644
index 0000000000..a6ba29f76c
--- /dev/null
+++ b/providers/qwen.py
@@ -0,0 +1,82 @@
+"""Qwen Portal provider profile."""
+
+import copy
+from typing import Any
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+
+class QwenProfile(ProviderProfile):
+ """Qwen Portal — message normalization, vl_high_resolution, metadata top-level."""
+
+ def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
+ """Normalize content to list-of-dicts format.
+
+ Inject cache_control on system message.
+
+ Matches the behavior of run_agent.py:_qwen_prepare_chat_messages().
+ """
+ prepared = copy.deepcopy(messages)
+ if not prepared:
+ return prepared
+
+ for msg in prepared:
+ if not isinstance(msg, dict):
+ continue
+ content = msg.get("content")
+ if isinstance(content, str):
+ msg["content"] = [{"type": "text", "text": content}]
+ elif isinstance(content, list):
+ normalized_parts = []
+ for part in content:
+ if isinstance(part, str):
+ normalized_parts.append({"type": "text", "text": part})
+ elif isinstance(part, dict):
+ normalized_parts.append(part)
+ if normalized_parts:
+ msg["content"] = normalized_parts
+
+ # Inject cache_control on the last part of the system message.
+ for msg in prepared:
+ if isinstance(msg, dict) and msg.get("role") == "system":
+ content = msg.get("content")
+ if (
+ isinstance(content, list)
+ and content
+ and isinstance(content[-1], dict)
+ ):
+ content[-1]["cache_control"] = {"type": "ephemeral"}
+ break
+
+ return prepared
+
+ def build_extra_body(
+ self, *, session_id: str | None = None, **context
+ ) -> dict[str, Any]:
+ return {"vl_high_resolution_images": True}
+
+ def build_api_kwargs_extras(
+ self,
+ *,
+ reasoning_config: dict | None = None,
+ qwen_session_metadata: dict | None = None,
+ **context,
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
+ """Qwen metadata goes to top-level api_kwargs, not extra_body."""
+ top_level = {}
+ if qwen_session_metadata:
+ top_level["metadata"] = qwen_session_metadata
+ return {}, top_level
+
+
+qwen = QwenProfile(
+ name="qwen-oauth",
+ aliases=("qwen", "qwen-portal", "qwen-cli"),
+ env_vars=("QWEN_API_KEY",),
+ base_url="https://portal.qwen.ai/v1",
+ auth_type="oauth_external",
+ default_max_tokens=65536,
+)
+
+register_provider(qwen)
diff --git a/providers/stepfun.py b/providers/stepfun.py
new file mode 100644
index 0000000000..1ec92cd8be
--- /dev/null
+++ b/providers/stepfun.py
@@ -0,0 +1,14 @@
+"""StepFun provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+stepfun = ProviderProfile(
+ name="stepfun",
+ aliases=("step", "stepfun-coding-plan"),
+ default_aux_model="step-3.5-flash",
+ env_vars=("STEPFUN_API_KEY",),
+ base_url="https://api.stepfun.ai/step_plan/v1",
+)
+
+register_provider(stepfun)
diff --git a/providers/vercel.py b/providers/vercel.py
new file mode 100644
index 0000000000..9d01ab9824
--- /dev/null
+++ b/providers/vercel.py
@@ -0,0 +1,43 @@
+"""Vercel AI Gateway provider profile.
+
+AI Gateway routes to multiple backends. Hermes sends attribution
+headers and full reasoning config passthrough.
+"""
+
+from typing import Any
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+
+class VercelAIGatewayProfile(ProviderProfile):
+ """Vercel AI Gateway — attribution headers + reasoning passthrough."""
+
+ def build_api_kwargs_extras(
+ self,
+ *,
+ reasoning_config: dict | None = None,
+ supports_reasoning: bool = True,
+ **ctx: Any,
+ ) -> tuple[dict[str, Any], dict[str, Any]]:
+ extra_body: dict[str, Any] = {}
+ if supports_reasoning and reasoning_config is not None:
+ extra_body["reasoning"] = dict(reasoning_config)
+ elif supports_reasoning:
+ extra_body["reasoning"] = {"enabled": True, "effort": "medium"}
+ return extra_body, {}
+
+
+vercel = VercelAIGatewayProfile(
+ name="ai-gateway",
+ aliases=("vercel", "vercel-ai-gateway", "ai_gateway", "aigateway"),
+ env_vars=("AI_GATEWAY_API_KEY",),
+ base_url="https://ai-gateway.vercel.sh/v1",
+ default_headers={
+ "HTTP-Referer": "https://hermes-agent.nousresearch.com",
+ "X-Title": "Hermes Agent",
+ },
+ default_aux_model="google/gemini-3-flash",
+)
+
+register_provider(vercel)
diff --git a/providers/xai.py b/providers/xai.py
new file mode 100644
index 0000000000..8d73ae0199
--- /dev/null
+++ b/providers/xai.py
@@ -0,0 +1,15 @@
+"""xAI (Grok) provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+xai = ProviderProfile(
+ name="xai",
+ aliases=("grok", "x-ai", "x.ai"),
+ api_mode="codex_responses",
+ env_vars=("XAI_API_KEY",),
+ base_url="https://api.x.ai/v1",
+ auth_type="api_key",
+)
+
+register_provider(xai)
diff --git a/providers/xiaomi.py b/providers/xiaomi.py
new file mode 100644
index 0000000000..2e0c8db7db
--- /dev/null
+++ b/providers/xiaomi.py
@@ -0,0 +1,13 @@
+"""Xiaomi MiMo provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+xiaomi = ProviderProfile(
+ name="xiaomi",
+ aliases=("mimo", "xiaomi-mimo"),
+ env_vars=("XIAOMI_API_KEY",),
+ base_url="https://api.xiaomimimo.com/v1",
+)
+
+register_provider(xiaomi)
diff --git a/providers/zai.py b/providers/zai.py
new file mode 100644
index 0000000000..70aa8704d1
--- /dev/null
+++ b/providers/zai.py
@@ -0,0 +1,21 @@
+"""ZAI / GLM provider profile."""
+
+from providers import register_provider
+from providers.base import ProviderProfile
+
+zai = ProviderProfile(
+ name="zai",
+ aliases=("glm", "z-ai", "z.ai", "zhipu"),
+ env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"),
+ display_name="Z.AI (GLM)",
+ description="Z.AI / GLM — Zhipu AI models",
+ signup_url="https://z.ai/",
+ fallback_models=(
+ "glm-5",
+ "glm-4-9b",
+ ),
+ base_url="https://api.z.ai/api/paas/v4",
+ default_aux_model="glm-4.5-flash",
+)
+
+register_provider(zai)
diff --git a/pyproject.toml b/pyproject.toml
index 4b7e8816ac..e73e543e00 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -137,7 +137,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector
hermes_cli = ["web_dist/**/*"]
[tool.setuptools.packages.find]
-include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"]
+include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"]
[tool.pytest.ini_options]
testpaths = ["tests"]
diff --git a/run_agent.py b/run_agent.py
index 3f2b783082..5e73d5261f 100644
--- a/run_agent.py
+++ b/run_agent.py
@@ -1371,6 +1371,17 @@ class AIAgent:
elif base_url_host_matches(effective_base, "chatgpt.com"):
from agent.auxiliary_client import _codex_cloudflare_headers
client_kwargs["default_headers"] = _codex_cloudflare_headers(api_key)
+ elif "default_headers" not in client_kwargs:
+ # Fall back to profile.default_headers for providers that
+ # declare custom headers (e.g. Vercel AI Gateway attribution,
+ # Kimi User-Agent on non-kimi.com endpoints).
+ try:
+ from providers import get_provider_profile as _gpf
+ _ph = _gpf(self.provider)
+ if _ph and _ph.default_headers:
+ client_kwargs["default_headers"] = dict(_ph.default_headers)
+ except Exception:
+ pass
else:
# No explicit creds — use the centralized provider router
from agent.auxiliary_client import resolve_provider_client
@@ -5037,7 +5048,7 @@ class AIAgent:
_validate_proxy_env_urls()
_validate_base_url(client_kwargs.get("base_url"))
if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"):
- from agent.copilot_acp_client import CopilotACPClient
+ from acp_adapter.copilot_client import CopilotACPClient
client = CopilotACPClient(**client_kwargs)
logger.info(
@@ -5726,7 +5737,19 @@ class AIAgent:
self._client_kwargs.get("api_key", "")
)
else:
- self._client_kwargs.pop("default_headers", None)
+ # No URL-specific headers — check profile.default_headers before clearing.
+ _ph_headers = None
+ try:
+ from providers import get_provider_profile as _gpf2
+ _ph2 = _gpf2(self.provider)
+ if _ph2 and _ph2.default_headers:
+ _ph_headers = dict(_ph2.default_headers)
+ except Exception:
+ pass
+ if _ph_headers:
+ self._client_kwargs["default_headers"] = _ph_headers
+ else:
+ self._client_kwargs.pop("default_headers", None)
def _swap_credential(self, entry) -> None:
runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
@@ -7857,66 +7880,79 @@ class AIAgent:
# ── chat_completions (default) ─────────────────────────────────────
_ct = self._get_transport()
- # Provider detection flags
- _is_qwen = self._is_qwen_portal()
- _is_or = self._is_openrouter_url()
- _is_gh = (
- base_url_host_matches(self._base_url_lower, "models.github.ai")
- or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com")
- )
- _is_nous = "nousresearch" in self._base_url_lower
- _is_nvidia = "integrate.api.nvidia.com" in self._base_url_lower
- _is_kimi = (
- base_url_host_matches(self.base_url, "api.kimi.com")
- or base_url_host_matches(self.base_url, "moonshot.ai")
- or base_url_host_matches(self.base_url, "moonshot.cn")
- )
-
- # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE
- # sentinel (temperature omitted entirely), a numeric override, or None.
+ # ── Provider profile path (all chat_completions providers) ─────────
+ # Profiles handle per-provider quirks via hooks. We compute the shared
+ # per-call context here and pass it through so hooks can use it.
try:
- from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE
- _ft = _fixed_temperature_for_model(self.model, self.base_url)
- _omit_temp = _ft is OMIT_TEMPERATURE
- _fixed_temp = _ft if not _omit_temp else None
+ from providers import get_provider_profile
+ _profile = get_provider_profile(self.provider)
except Exception:
- _omit_temp = False
- _fixed_temp = None
+ _profile = None
- # Provider preferences (OpenRouter-specific)
- _prefs: Dict[str, Any] = {}
- if self.providers_allowed:
- _prefs["only"] = self.providers_allowed
- if self.providers_ignored:
- _prefs["ignore"] = self.providers_ignored
- if self.providers_order:
- _prefs["order"] = self.providers_order
- if self.provider_sort:
- _prefs["sort"] = self.provider_sort
- if self.provider_require_parameters:
- _prefs["require_parameters"] = True
- if self.provider_data_collection:
- _prefs["data_collection"] = self.provider_data_collection
+ if _profile:
+ _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
+ if _ephemeral_out is not None:
+ self._ephemeral_max_output_tokens = None
- # Anthropic max output for Claude on OpenRouter/Nous
- _ant_max = None
- if (_is_or or _is_nous) and "claude" in (self.model or "").lower():
- try:
- from agent.anthropic_adapter import _get_anthropic_max_output
- _ant_max = _get_anthropic_max_output(self.model)
- except Exception:
- pass # fail open — let the proxy pick its default
+ # Per-call context for profile hooks — mirrors the legacy flag block.
+ # Computed here so profiles receive live per-call values (not stale).
+ _prefs: Dict[str, Any] = {}
+ if self.providers_allowed:
+ _prefs["only"] = self.providers_allowed
+ if self.providers_ignored:
+ _prefs["ignore"] = self.providers_ignored
+ if self.providers_order:
+ _prefs["order"] = self.providers_order
+ if self.provider_sort:
+ _prefs["sort"] = self.provider_sort
+ if self.provider_require_parameters:
+ _prefs["require_parameters"] = True
+ if self.provider_data_collection:
+ _prefs["data_collection"] = self.provider_data_collection
- # Qwen session metadata precomputed here (promptId is per-call random)
- _qwen_meta = None
- if _is_qwen:
- _qwen_meta = {
- "sessionId": self.session_id or "hermes",
- "promptId": str(uuid.uuid4()),
- }
+ _is_or = self._is_openrouter_url()
+ _is_nous = "nousresearch" in self._base_url_lower
+ _ant_max = None
+ if (_is_or or _is_nous) and "claude" in (self.model or "").lower():
+ try:
+ from agent.anthropic_adapter import _get_anthropic_max_output
+ _ant_max = _get_anthropic_max_output(self.model)
+ except Exception:
+ pass
- # Ephemeral max output override — consume immediately so the next
- # turn doesn't inherit it.
+ _is_qwen = self._is_qwen_portal()
+ _qwen_meta = None
+ if _is_qwen:
+ _qwen_meta = {
+ "sessionId": self.session_id or "hermes",
+ "promptId": str(uuid.uuid4()),
+ }
+
+ return _ct.build_kwargs(
+ model=self.model,
+ messages=api_messages,
+ tools=self.tools,
+ timeout=self._resolved_api_call_timeout(),
+ max_tokens=self.max_tokens,
+ ephemeral_max_output_tokens=_ephemeral_out,
+ max_tokens_param_fn=self._max_tokens_param,
+ reasoning_config=self.reasoning_config,
+ request_overrides=self.request_overrides,
+ session_id=getattr(self, "session_id", None),
+ provider_profile=_profile,
+ ollama_num_ctx=self._ollama_num_ctx,
+ # Context forwarded to profile hooks:
+ provider_preferences=_prefs or None,
+ anthropic_max_output=_ant_max,
+ supports_reasoning=self._supports_reasoning_extra_body(),
+ qwen_session_metadata=_qwen_meta,
+ )
+
+ # ── Legacy flag path ────────────────────────────────────────────
+ # Reached only when get_provider_profile() returns None — i.e. a
+ # completely unknown provider not in providers/ registry.
+ # Best-effort: send a clean chat_completions request with no
+ # provider-specific quirks.
_ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None)
if _ephemeral_out is not None:
self._ephemeral_max_output_tokens = None
@@ -7935,24 +7971,7 @@ class AIAgent:
reasoning_config=self.reasoning_config,
request_overrides=self.request_overrides,
session_id=getattr(self, "session_id", None),
- model_lower=(self.model or "").lower(),
- is_openrouter=_is_or,
- is_nous=_is_nous,
- is_qwen_portal=_is_qwen,
- is_github_models=_is_gh,
- is_nvidia_nim=_is_nvidia,
- is_kimi=_is_kimi,
- is_custom_provider=self.provider == "custom",
ollama_num_ctx=self._ollama_num_ctx,
- provider_preferences=_prefs or None,
- qwen_prepare_fn=self._qwen_prepare_chat_messages if _is_qwen else None,
- qwen_prepare_inplace_fn=self._qwen_prepare_chat_messages_inplace if _is_qwen else None,
- qwen_session_metadata=_qwen_meta,
- fixed_temperature=_fixed_temp,
- omit_temperature=_omit_temp,
- supports_reasoning=self._supports_reasoning_extra_body(),
- github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None,
- anthropic_max_output=_ant_max,
)
def _supports_reasoning_extra_body(self) -> bool:
diff --git a/tests/agent/test_copilot_acp_client.py b/tests/agent/test_copilot_acp_client.py
index 63c87fdabd..584330ca79 100644
--- a/tests/agent/test_copilot_acp_client.py
+++ b/tests/agent/test_copilot_acp_client.py
@@ -10,7 +10,7 @@ import unittest
from pathlib import Path
from unittest.mock import patch
-from agent.copilot_acp_client import CopilotACPClient
+from acp_adapter.copilot_client import CopilotACPClient
class _FakeProcess:
@@ -100,7 +100,7 @@ class CopilotACPClientSafetyTests(unittest.TestCase):
target = home / ".ssh" / "id_rsa"
target.parent.mkdir(parents=True, exist_ok=True)
- with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True):
+ with patch("acp_adapter.copilot_client.is_write_denied", return_value=True, create=True):
response = self._dispatch(
{
"jsonrpc": "2.0",
diff --git a/tests/agent/test_minimax_provider.py b/tests/agent/test_minimax_provider.py
index 9ae865d57e..329500917d 100644
--- a/tests/agent/test_minimax_provider.py
+++ b/tests/agent/test_minimax_provider.py
@@ -71,17 +71,17 @@ class TestMinimaxThinkingSupport:
class TestMinimaxAuxModel:
- """Verify auxiliary model is standard (not highspeed)."""
+ """Verify auxiliary model is standard (not highspeed) — now reads from profiles."""
def test_minimax_aux_is_standard(self):
- from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
- assert _API_KEY_PROVIDER_AUX_MODELS["minimax"] == "MiniMax-M2.7"
- assert _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] == "MiniMax-M2.7"
+ from agent.auxiliary_client import _get_aux_model_for_provider
+ assert _get_aux_model_for_provider("minimax") == "MiniMax-M2.7"
+ assert _get_aux_model_for_provider("minimax-cn") == "MiniMax-M2.7"
def test_minimax_aux_not_highspeed(self):
- from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
- assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax"]
- assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"]
+ from agent.auxiliary_client import _get_aux_model_for_provider
+ assert "highspeed" not in _get_aux_model_for_provider("minimax")
+ assert "highspeed" not in _get_aux_model_for_provider("minimax-cn")
class TestMinimaxBetaHeaders:
diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py
index 4adf9f72e5..5a9bf9ca89 100644
--- a/tests/agent/transports/test_chat_completions.py
+++ b/tests/agent/transports/test_chat_completions.py
@@ -73,17 +73,21 @@ class TestChatCompletionsBuildKwargs:
assert kw["tools"] == tools
def test_openrouter_provider_prefs(self, transport):
+ from providers import get_provider_profile
+ profile = get_provider_profile("openrouter")
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
- is_openrouter=True,
+ provider_profile=profile,
provider_preferences={"only": ["openai"]},
)
assert kw["extra_body"]["provider"] == {"only": ["openai"]}
def test_nous_tags(self, transport):
+ from providers import get_provider_profile
+ profile = get_provider_profile("nous")
msgs = [{"role": "user", "content": "Hi"}]
- kw = transport.build_kwargs(model="gpt-4o", messages=msgs, is_nous=True)
+ kw = transport.build_kwargs(model="gpt-4o", messages=msgs, provider_profile=profile)
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
def test_reasoning_default(self, transport):
@@ -95,29 +99,36 @@ class TestChatCompletionsBuildKwargs:
assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
def test_nous_omits_disabled_reasoning(self, transport):
+ from providers import get_provider_profile
+ profile = get_provider_profile("nous")
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
+ provider_profile=profile,
supports_reasoning=True,
- is_nous=True,
reasoning_config={"enabled": False},
)
# Nous rejects enabled=false; reasoning omitted entirely
assert "reasoning" not in kw.get("extra_body", {})
def test_ollama_num_ctx(self, transport):
+ from providers import get_provider_profile
+ profile = get_provider_profile("custom")
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="llama3", messages=msgs,
+ provider_profile=profile,
ollama_num_ctx=32768,
)
assert kw["extra_body"]["options"]["num_ctx"] == 32768
def test_custom_think_false(self, transport):
+ from providers import get_provider_profile
+ profile = get_provider_profile("custom")
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="qwen3", messages=msgs,
- is_custom_provider=True,
+ provider_profile=profile,
reasoning_config={"effort": "none"},
)
assert kw["extra_body"]["think"] is False
@@ -142,23 +153,29 @@ class TestChatCompletionsBuildKwargs:
assert kw["max_tokens"] == 2048
def test_nvidia_default_max_tokens(self, transport):
+ """NVIDIA max_tokens=16384 is now set via ProviderProfile, not legacy flag."""
+ from providers import get_provider_profile
+
+ profile = get_provider_profile("nvidia")
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
- model="glm-4.7", messages=msgs,
- is_nvidia_nim=True,
+ model="nvidia/llama-3.1-405b-instruct",
+ messages=msgs,
max_tokens_param_fn=lambda n: {"max_tokens": n},
+ provider_profile=profile,
)
- # NVIDIA default: 16384
assert kw["max_tokens"] == 16384
def test_qwen_default_max_tokens(self, transport):
+ from providers import get_provider_profile
+ profile = get_provider_profile("qwen-oauth")
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="qwen3-coder-plus", messages=msgs,
- is_qwen_portal=True,
+ provider_profile=profile,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
- # Qwen default: 65536
+ # Qwen default: 65536 from profile.default_max_tokens
assert kw["max_tokens"] == 65536
def test_anthropic_max_output_for_claude_on_aggregator(self, transport):
@@ -181,14 +198,23 @@ class TestChatCompletionsBuildKwargs:
assert kw["service_tier"] == "priority"
def test_fixed_temperature(self, transport):
+ """Fixed temperature is now set via ProviderProfile.fixed_temperature."""
+ from providers.base import ProviderProfile
msgs = [{"role": "user", "content": "Hi"}]
- kw = transport.build_kwargs(model="gpt-4o", messages=msgs, fixed_temperature=0.6)
+ kw = transport.build_kwargs(
+ model="gpt-4o", messages=msgs,
+ provider_profile=ProviderProfile(name="_t", fixed_temperature=0.6),
+ )
assert kw["temperature"] == 0.6
def test_omit_temperature(self, transport):
+ """Omit temperature is set via ProviderProfile with OMIT_TEMPERATURE sentinel."""
+ from providers.base import ProviderProfile, OMIT_TEMPERATURE
msgs = [{"role": "user", "content": "Hi"}]
- kw = transport.build_kwargs(model="gpt-4o", messages=msgs, omit_temperature=True, fixed_temperature=0.5)
- # omit wins
+ kw = transport.build_kwargs(
+ model="gpt-4o", messages=msgs,
+ provider_profile=ProviderProfile(name="_t", fixed_temperature=OMIT_TEMPERATURE),
+ )
assert "temperature" not in kw
@@ -196,18 +222,22 @@ class TestChatCompletionsKimi:
"""Regression tests for the Kimi/Moonshot quirks migrated into the transport."""
def test_kimi_max_tokens_default(self, transport):
+ from providers import get_provider_profile
+ profile = get_provider_profile("kimi-coding")
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
- is_kimi=True,
+ provider_profile=profile,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
- # Kimi CLI default: 32000
+ # Kimi CLI default: 32000 from KimiProfile.default_max_tokens
assert kw["max_tokens"] == 32000
def test_kimi_reasoning_effort_top_level(self, transport):
+ from providers import get_provider_profile
+ profile = get_provider_profile("kimi-coding")
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
- is_kimi=True,
+ provider_profile=profile,
reasoning_config={"effort": "high"},
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
@@ -225,17 +255,21 @@ class TestChatCompletionsKimi:
assert "reasoning_effort" not in kw
def test_kimi_thinking_enabled_extra_body(self, transport):
+ from providers import get_provider_profile
+ profile = get_provider_profile("kimi-coding")
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
- is_kimi=True,
+ provider_profile=profile,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
assert kw["extra_body"]["thinking"] == {"type": "enabled"}
def test_kimi_thinking_disabled_extra_body(self, transport):
+ from providers import get_provider_profile
+ profile = get_provider_profile("kimi-coding")
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
- is_kimi=True,
+ provider_profile=profile,
reasoning_config={"enabled": False},
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
diff --git a/tests/hermes_cli/test_gmi_provider.py b/tests/hermes_cli/test_gmi_provider.py
index d3b8c1d7aa..0b9363e675 100644
--- a/tests/hermes_cli/test_gmi_provider.py
+++ b/tests/hermes_cli/test_gmi_provider.py
@@ -269,9 +269,9 @@ class TestGmiModelMetadata:
class TestGmiAuxiliary:
def test_aux_default_model(self):
- from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS
+ from agent.auxiliary_client import _get_aux_model_for_provider
- assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "google/gemini-3.1-flash-lite-preview"
+ assert _get_aux_model_for_provider("gmi") == "google/gemini-3.1-flash-lite-preview"
def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch):
monkeypatch.setenv("GMI_API_KEY", "gmi-test-key")
diff --git a/tests/providers/__init__.py b/tests/providers/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/tests/providers/test_e2e_wiring.py b/tests/providers/test_e2e_wiring.py
new file mode 100644
index 0000000000..424dad69bc
--- /dev/null
+++ b/tests/providers/test_e2e_wiring.py
@@ -0,0 +1,118 @@
+"""E2E tests: verify _build_kwargs_from_profile produces correct output.
+
+These tests call _build_kwargs_from_profile on the transport directly,
+without importing run_agent (which would cause xdist worker contamination).
+"""
+
+import pytest
+from agent.transports.chat_completions import ChatCompletionsTransport
+from providers import get_provider_profile
+
+
+@pytest.fixture
+def transport():
+ return ChatCompletionsTransport()
+
+
+def _msgs():
+ return [{"role": "user", "content": "hi"}]
+
+
+class TestNvidiaProfileWiring:
+ def test_nvidia_gets_default_max_tokens(self, transport):
+ profile = get_provider_profile("nvidia")
+ kwargs = transport.build_kwargs(
+ model="nvidia/llama-3.1-nemotron-70b-instruct",
+ messages=_msgs(),
+ tools=None,
+ provider_profile=profile,
+ max_tokens=None,
+ max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {},
+ timeout=300,
+ reasoning_config=None,
+ request_overrides=None,
+ session_id="test",
+ ollama_num_ctx=None,
+ )
+ # NVIDIA profile sets default_max_tokens=16384
+ assert kwargs.get("max_tokens") == 16384
+
+ def test_nvidia_nim_alias(self, transport):
+ profile = get_provider_profile("nvidia-nim")
+ assert profile is not None
+ assert profile.name == "nvidia"
+ assert profile.default_max_tokens == 16384
+
+ def test_nvidia_model_passed(self, transport):
+ profile = get_provider_profile("nvidia")
+ kwargs = transport.build_kwargs(
+ model="nvidia/test-model",
+ messages=_msgs(),
+ tools=None,
+ provider_profile=profile,
+ max_tokens=None,
+ max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {},
+ timeout=300,
+ reasoning_config=None,
+ request_overrides=None,
+ session_id="test",
+ ollama_num_ctx=None,
+ )
+ assert kwargs["model"] == "nvidia/test-model"
+
+ def test_nvidia_messages_passed(self, transport):
+ profile = get_provider_profile("nvidia")
+ msgs = _msgs()
+ kwargs = transport.build_kwargs(
+ model="nvidia/test",
+ messages=msgs,
+ tools=None,
+ provider_profile=profile,
+ max_tokens=None,
+ max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {},
+ timeout=300,
+ reasoning_config=None,
+ request_overrides=None,
+ session_id="test",
+ ollama_num_ctx=None,
+ )
+ assert kwargs["messages"] == msgs
+
+
+class TestDeepSeekProfileWiring:
+ def test_deepseek_no_forced_max_tokens(self, transport):
+ profile = get_provider_profile("deepseek")
+ kwargs = transport.build_kwargs(
+ model="deepseek-chat",
+ messages=_msgs(),
+ tools=None,
+ provider_profile=profile,
+ max_tokens=None,
+ max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {},
+ timeout=300,
+ reasoning_config=None,
+ request_overrides=None,
+ session_id="test",
+ ollama_num_ctx=None,
+ )
+ # DeepSeek has no default_max_tokens
+ assert kwargs["model"] == "deepseek-chat"
+ assert kwargs.get("max_tokens") is None or "max_tokens" not in kwargs
+
+ def test_deepseek_messages_passed(self, transport):
+ profile = get_provider_profile("deepseek")
+ msgs = _msgs()
+ kwargs = transport.build_kwargs(
+ model="deepseek-chat",
+ messages=msgs,
+ tools=None,
+ provider_profile=profile,
+ max_tokens=None,
+ max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {},
+ timeout=300,
+ reasoning_config=None,
+ request_overrides=None,
+ session_id="test",
+ ollama_num_ctx=None,
+ )
+ assert kwargs["messages"] == msgs
diff --git a/tests/providers/test_profile_wiring.py b/tests/providers/test_profile_wiring.py
new file mode 100644
index 0000000000..9096c82b6a
--- /dev/null
+++ b/tests/providers/test_profile_wiring.py
@@ -0,0 +1,290 @@
+"""Profile-path parity tests: verify profile path produces identical output to legacy flags.
+
+Each test calls build_kwargs twice — once with legacy flags, once with provider_profile —
+and asserts the output is identical. This catches any behavioral drift between the two paths.
+"""
+
+import pytest
+from agent.transports.chat_completions import ChatCompletionsTransport
+from providers import get_provider_profile
+
+
+@pytest.fixture
+def transport():
+ return ChatCompletionsTransport()
+
+
+def _msgs():
+ return [{"role": "user", "content": "hello"}]
+
+
+def _max_tokens_fn(n):
+ return {"max_completion_tokens": n}
+
+
+class TestNvidiaProfileParity:
+ def test_max_tokens_match(self, transport):
+ """NVIDIA profile sets max_tokens=16384; legacy flag is removed."""
+ profile = transport.build_kwargs(
+ model="nvidia/nemotron", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("nvidia"),
+ max_tokens_param_fn=_max_tokens_fn,
+ )
+ assert profile["max_completion_tokens"] == 16384
+
+
+class TestKimiProfileParity:
+ def test_temperature_omitted(self, transport):
+ legacy = transport.build_kwargs(
+ model="kimi-k2", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("kimi-coding"), omit_temperature=True,
+ )
+ profile = transport.build_kwargs(
+ model="kimi-k2", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("kimi"),
+ )
+ assert "temperature" not in legacy
+ assert "temperature" not in profile
+
+ def test_max_tokens(self, transport):
+ legacy = transport.build_kwargs(
+ model="kimi-k2", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("kimi-coding"), max_tokens_param_fn=_max_tokens_fn,
+ )
+ profile = transport.build_kwargs(
+ model="kimi-k2", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("kimi"),
+ max_tokens_param_fn=_max_tokens_fn,
+ )
+ assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 32000
+
+ def test_thinking_enabled(self, transport):
+ rc = {"enabled": True, "effort": "high"}
+ legacy = transport.build_kwargs(
+ model="kimi-k2", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("kimi-coding"), reasoning_config=rc,
+ )
+ profile = transport.build_kwargs(
+ model="kimi-k2", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("kimi"),
+ reasoning_config=rc,
+ )
+ assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"]
+ assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "high"
+
+ def test_thinking_disabled(self, transport):
+ rc = {"enabled": False}
+ legacy = transport.build_kwargs(
+ model="kimi-k2", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("kimi-coding"), reasoning_config=rc,
+ )
+ profile = transport.build_kwargs(
+ model="kimi-k2", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("kimi"),
+ reasoning_config=rc,
+ )
+ assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"]
+ assert profile["extra_body"]["thinking"]["type"] == "disabled"
+ assert "reasoning_effort" not in profile
+ assert "reasoning_effort" not in legacy
+
+ def test_reasoning_effort_default(self, transport):
+ rc = {"enabled": True}
+ legacy = transport.build_kwargs(
+ model="kimi-k2", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("kimi-coding"), reasoning_config=rc,
+ )
+ profile = transport.build_kwargs(
+ model="kimi-k2", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("kimi"),
+ reasoning_config=rc,
+ )
+ assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "medium"
+
+
+class TestOpenRouterProfileParity:
+ def test_provider_preferences(self, transport):
+ prefs = {"allow": ["anthropic"]}
+ legacy = transport.build_kwargs(
+ model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("openrouter"), provider_preferences=prefs,
+ )
+ profile = transport.build_kwargs(
+ model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ provider_preferences=prefs,
+ )
+ assert profile["extra_body"]["provider"] == legacy["extra_body"]["provider"]
+
+ def test_reasoning_full_config(self, transport):
+ rc = {"enabled": True, "effort": "high"}
+ legacy = transport.build_kwargs(
+ model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("openrouter"), supports_reasoning=True, reasoning_config=rc,
+ )
+ profile = transport.build_kwargs(
+ model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ supports_reasoning=True, reasoning_config=rc,
+ )
+ assert profile["extra_body"]["reasoning"] == legacy["extra_body"]["reasoning"]
+
+ def test_default_reasoning(self, transport):
+ legacy = transport.build_kwargs(
+ model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("openrouter"), supports_reasoning=True,
+ )
+ profile = transport.build_kwargs(
+ model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ supports_reasoning=True,
+ )
+ assert profile["extra_body"]["reasoning"] == legacy["extra_body"]["reasoning"]
+
+
+class TestNousProfileParity:
+ def test_tags(self, transport):
+ legacy = transport.build_kwargs(
+ model="hermes-3", messages=_msgs(), tools=None, provider_profile=get_provider_profile("nous"),
+ )
+ profile = transport.build_kwargs(
+ model="hermes-3", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("nous"),
+ )
+ assert profile["extra_body"]["tags"] == legacy["extra_body"]["tags"]
+
+ def test_reasoning_omitted_when_disabled(self, transport):
+ rc = {"enabled": False}
+ legacy = transport.build_kwargs(
+ model="hermes-3", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("nous"), supports_reasoning=True, reasoning_config=rc,
+ )
+ profile = transport.build_kwargs(
+ model="hermes-3", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("nous"),
+ supports_reasoning=True, reasoning_config=rc,
+ )
+ assert "reasoning" not in legacy.get("extra_body", {})
+ assert "reasoning" not in profile.get("extra_body", {})
+
+
+class TestQwenProfileParity:
+ def test_max_tokens(self, transport):
+ legacy = transport.build_kwargs(
+ model="qwen3.5", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("qwen-oauth"), max_tokens_param_fn=_max_tokens_fn,
+ )
+ profile = transport.build_kwargs(
+ model="qwen3.5", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("qwen"),
+ max_tokens_param_fn=_max_tokens_fn,
+ )
+ assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 65536
+
+ def test_vl_high_resolution(self, transport):
+ legacy = transport.build_kwargs(
+ model="qwen3.5", messages=_msgs(), tools=None, provider_profile=get_provider_profile("qwen-oauth"),
+ )
+ profile = transport.build_kwargs(
+ model="qwen3.5", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("qwen"),
+ )
+ assert profile["extra_body"]["vl_high_resolution_images"] == legacy["extra_body"]["vl_high_resolution_images"]
+
+ def test_metadata_top_level(self, transport):
+ meta = {"sessionId": "s123", "promptId": "p456"}
+ legacy = transport.build_kwargs(
+ model="qwen3.5", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("qwen-oauth"), qwen_session_metadata=meta,
+ )
+ profile = transport.build_kwargs(
+ model="qwen3.5", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("qwen"),
+ qwen_session_metadata=meta,
+ )
+ assert profile["metadata"] == legacy["metadata"] == meta
+ assert "metadata" not in profile.get("extra_body", {})
+
+ def test_message_preprocessing(self, transport):
+ """Qwen profile normalizes string content to list-of-parts."""
+ msgs = [
+ {"role": "system", "content": "You are helpful."},
+ {"role": "user", "content": "hello"},
+ ]
+ profile = transport.build_kwargs(
+ model="qwen3.5", messages=msgs, tools=None,
+ provider_profile=get_provider_profile("qwen"),
+ )
+ out_msgs = profile["messages"]
+ # System message content normalized + cache_control injected
+ assert isinstance(out_msgs[0]["content"], list)
+ assert out_msgs[0]["content"][0]["type"] == "text"
+ assert "cache_control" in out_msgs[0]["content"][-1]
+ # User message content normalized
+ assert isinstance(out_msgs[1]["content"], list)
+ assert out_msgs[1]["content"][0] == {"type": "text", "text": "hello"}
+
+
+class TestDeveloperRoleParity:
+ """Developer role swap must work on BOTH legacy and profile paths."""
+
+ def test_legacy_path_swaps_for_gpt5(self, transport):
+ msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}]
+ kw = transport.build_kwargs(
+ model="gpt-5.4", messages=msgs, tools=None,
+ )
+ assert kw["messages"][0]["role"] == "developer"
+
+ def test_profile_path_swaps_for_gpt5(self, transport):
+ msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}]
+ kw = transport.build_kwargs(
+ model="gpt-5.4", messages=msgs, tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ )
+ assert kw["messages"][0]["role"] == "developer"
+
+ def test_profile_path_no_swap_for_claude(self, transport):
+ msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}]
+ kw = transport.build_kwargs(
+ model="anthropic/claude-sonnet-4.6", messages=msgs, tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ )
+ assert kw["messages"][0]["role"] == "system"
+
+
+class TestRequestOverridesParity:
+ """request_overrides with extra_body must merge identically on both paths."""
+
+ def test_extra_body_override_legacy(self, transport):
+ kw = transport.build_kwargs(
+ model="gpt-5.4", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ request_overrides={"extra_body": {"custom_key": "custom_val"}},
+ )
+ assert kw["extra_body"]["custom_key"] == "custom_val"
+
+ def test_extra_body_override_profile(self, transport):
+ kw = transport.build_kwargs(
+ model="gpt-5.4", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ request_overrides={"extra_body": {"custom_key": "custom_val"}},
+ )
+ assert kw["extra_body"]["custom_key"] == "custom_val"
+
+ def test_extra_body_override_merges_with_provider_body(self, transport):
+ """Override extra_body merges WITH provider extra_body, not replaces."""
+ kw = transport.build_kwargs(
+ model="hermes-3", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("nous"),
+ request_overrides={"extra_body": {"custom": True}},
+ )
+ assert kw["extra_body"]["tags"] == ["product=hermes-agent"] # from profile
+ assert kw["extra_body"]["custom"] is True # from override
+
+ def test_top_level_override(self, transport):
+ kw = transport.build_kwargs(
+ model="gpt-5.4", messages=_msgs(), tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ request_overrides={"top_p": 0.9},
+ )
+ assert kw["top_p"] == 0.9
diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py
new file mode 100644
index 0000000000..3e80b0d2f2
--- /dev/null
+++ b/tests/providers/test_provider_profiles.py
@@ -0,0 +1,203 @@
+"""Tests for the provider module registry and profiles."""
+
+import pytest
+from providers import get_provider_profile, _REGISTRY
+from providers.base import ProviderProfile, OMIT_TEMPERATURE
+
+
+class TestRegistry:
+ def test_discovery_populates_registry(self):
+ p = get_provider_profile("nvidia")
+ assert p is not None
+ assert p.name == "nvidia"
+
+ def test_alias_lookup(self):
+ assert get_provider_profile("kimi").name == "kimi-coding"
+ assert get_provider_profile("moonshot").name == "kimi-coding"
+ assert get_provider_profile("kimi-coding-cn").name == "kimi-coding-cn"
+ assert get_provider_profile("or").name == "openrouter"
+ assert get_provider_profile("nous-portal").name == "nous"
+ assert get_provider_profile("qwen").name == "qwen-oauth"
+ assert get_provider_profile("qwen-portal").name == "qwen-oauth"
+
+ def test_unknown_provider_returns_none(self):
+ assert get_provider_profile("nonexistent-provider") is None
+
+ def test_all_providers_have_name(self):
+ get_provider_profile("nvidia") # trigger discovery
+ for name, profile in _REGISTRY.items():
+ assert profile.name == name
+
+
+class TestNvidiaProfile:
+ def test_max_tokens(self):
+ p = get_provider_profile("nvidia")
+ assert p.default_max_tokens == 16384
+
+ def test_no_special_temperature(self):
+ p = get_provider_profile("nvidia")
+ assert p.fixed_temperature is None
+
+ def test_base_url(self):
+ p = get_provider_profile("nvidia")
+ assert "nvidia.com" in p.base_url
+
+
+class TestKimiProfile:
+ def test_temperature_omit(self):
+ p = get_provider_profile("kimi")
+ assert p.fixed_temperature is OMIT_TEMPERATURE
+
+ def test_max_tokens(self):
+ p = get_provider_profile("kimi")
+ assert p.default_max_tokens == 32000
+
+ def test_cn_separate_profile(self):
+ p = get_provider_profile("kimi-coding-cn")
+ assert p.name == "kimi-coding-cn"
+ assert p.env_vars == ("KIMI_CN_API_KEY",)
+ assert "moonshot.cn" in p.base_url
+
+ def test_cn_not_alias_of_kimi(self):
+ kimi = get_provider_profile("kimi-coding")
+ cn = get_provider_profile("kimi-coding-cn")
+ assert kimi is not cn
+ assert kimi.base_url != cn.base_url
+
+ def test_thinking_enabled(self):
+ p = get_provider_profile("kimi")
+ eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True, "effort": "high"})
+ assert eb["thinking"] == {"type": "enabled"}
+ assert tl["reasoning_effort"] == "high"
+
+ def test_thinking_disabled(self):
+ p = get_provider_profile("kimi")
+ eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": False})
+ assert eb["thinking"] == {"type": "disabled"}
+ assert "reasoning_effort" not in tl
+
+ def test_reasoning_effort_default(self):
+ p = get_provider_profile("kimi")
+ eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True})
+ assert tl["reasoning_effort"] == "medium"
+
+ def test_no_config_defaults(self):
+ p = get_provider_profile("kimi")
+ eb, tl = p.build_api_kwargs_extras(reasoning_config=None)
+ assert eb["thinking"] == {"type": "enabled"}
+ assert tl["reasoning_effort"] == "medium"
+
+
+class TestOpenRouterProfile:
+ def test_extra_body_with_prefs(self):
+ p = get_provider_profile("openrouter")
+ body = p.build_extra_body(provider_preferences={"allow": ["anthropic"]})
+ assert body["provider"] == {"allow": ["anthropic"]}
+
+ def test_extra_body_no_prefs(self):
+ p = get_provider_profile("openrouter")
+ body = p.build_extra_body()
+ assert body == {}
+
+ def test_reasoning_full_config(self):
+ p = get_provider_profile("openrouter")
+ eb, _ = p.build_api_kwargs_extras(
+ reasoning_config={"enabled": True, "effort": "high"},
+ supports_reasoning=True,
+ )
+ assert eb["reasoning"] == {"enabled": True, "effort": "high"}
+
+ def test_reasoning_disabled_still_passes(self):
+ """OpenRouter passes disabled reasoning through (unlike Nous)."""
+ p = get_provider_profile("openrouter")
+ eb, _ = p.build_api_kwargs_extras(
+ reasoning_config={"enabled": False},
+ supports_reasoning=True,
+ )
+ assert eb["reasoning"] == {"enabled": False}
+
+ def test_default_reasoning(self):
+ p = get_provider_profile("openrouter")
+ eb, _ = p.build_api_kwargs_extras(supports_reasoning=True)
+ assert eb["reasoning"] == {"enabled": True, "effort": "medium"}
+
+
+class TestNousProfile:
+ def test_tags(self):
+ p = get_provider_profile("nous")
+ body = p.build_extra_body()
+ assert body["tags"] == ["product=hermes-agent"]
+
+ def test_auth_type(self):
+ p = get_provider_profile("nous")
+ assert p.auth_type == "oauth_device_code"
+
+ def test_reasoning_enabled(self):
+ p = get_provider_profile("nous")
+ eb, _ = p.build_api_kwargs_extras(
+ reasoning_config={"enabled": True, "effort": "medium"},
+ supports_reasoning=True,
+ )
+ assert eb["reasoning"] == {"enabled": True, "effort": "medium"}
+
+ def test_reasoning_omitted_when_disabled(self):
+ p = get_provider_profile("nous")
+ eb, _ = p.build_api_kwargs_extras(
+ reasoning_config={"enabled": False},
+ supports_reasoning=True,
+ )
+ assert "reasoning" not in eb
+
+
+class TestQwenProfile:
+ def test_max_tokens(self):
+ p = get_provider_profile("qwen-oauth")
+ assert p.default_max_tokens == 65536
+
+ def test_auth_type(self):
+ p = get_provider_profile("qwen-oauth")
+ assert p.auth_type == "oauth_external"
+
+ def test_extra_body_vl(self):
+ p = get_provider_profile("qwen-oauth")
+ body = p.build_extra_body()
+ assert body["vl_high_resolution_images"] is True
+
+ def test_prepare_messages_normalizes_content(self):
+ p = get_provider_profile("qwen-oauth")
+ msgs = [
+ {"role": "system", "content": "Be helpful"},
+ {"role": "user", "content": "hello"},
+ ]
+ result = p.prepare_messages(msgs)
+ # System message: content normalized to list, cache_control on last part
+ assert isinstance(result[0]["content"], list)
+ assert result[0]["content"][-1].get("cache_control") == {"type": "ephemeral"}
+ assert result[0]["content"][-1]["text"] == "Be helpful"
+ # User message: content normalized to list
+ assert isinstance(result[1]["content"], list)
+ assert result[1]["content"][0]["text"] == "hello"
+
+ def test_metadata_top_level(self):
+ p = get_provider_profile("qwen-oauth")
+ meta = {"sessionId": "s123", "promptId": "p456"}
+ eb, tl = p.build_api_kwargs_extras(qwen_session_metadata=meta)
+ assert tl["metadata"] == meta
+ assert "metadata" not in eb
+
+
+class TestBaseProfile:
+ def test_prepare_messages_passthrough(self):
+ p = ProviderProfile(name="test")
+ msgs = [{"role": "user", "content": "hi"}]
+ assert p.prepare_messages(msgs) is msgs
+
+ def test_build_extra_body_empty(self):
+ p = ProviderProfile(name="test")
+ assert p.build_extra_body() == {}
+
+ def test_build_api_kwargs_extras_empty(self):
+ p = ProviderProfile(name="test")
+ eb, tl = p.build_api_kwargs_extras()
+ assert eb == {}
+ assert tl == {}
diff --git a/tests/providers/test_transport_parity.py b/tests/providers/test_transport_parity.py
new file mode 100644
index 0000000000..be88bc580a
--- /dev/null
+++ b/tests/providers/test_transport_parity.py
@@ -0,0 +1,258 @@
+"""Parity tests: pin the exact current transport behavior per provider.
+
+These tests document the flag-based contract between run_agent.py and
+ChatCompletionsTransport.build_kwargs(). When the next PR wires profiles
+to replace flags, every assertion here must still pass — any failure is
+a behavioral regression.
+"""
+
+import pytest
+from agent.transports.chat_completions import ChatCompletionsTransport
+from providers import get_provider_profile
+
+
+@pytest.fixture
+def transport():
+ return ChatCompletionsTransport()
+
+
+def _simple_messages():
+ return [{"role": "user", "content": "hello"}]
+
+
+def _max_tokens_fn(n):
+ return {"max_completion_tokens": n}
+
+
+class TestNvidiaParity:
+ """NVIDIA NIM: default max_tokens=16384."""
+
+ def test_default_max_tokens(self, transport):
+ """NVIDIA default max_tokens=16384 comes from profile, not legacy is_nvidia_nim flag."""
+ from providers import get_provider_profile
+
+ profile = get_provider_profile("nvidia")
+ kw = transport.build_kwargs(
+ model="nvidia/llama-3.1-nemotron-70b-instruct",
+ messages=_simple_messages(),
+ tools=None,
+ max_tokens_param_fn=_max_tokens_fn,
+ provider_profile=profile,
+ )
+ assert kw["max_completion_tokens"] == 16384
+
+ def test_user_max_tokens_overrides(self, transport):
+ from providers import get_provider_profile
+
+ profile = get_provider_profile("nvidia")
+ kw = transport.build_kwargs(
+ model="nvidia/llama-3.1-nemotron-70b-instruct",
+ messages=_simple_messages(),
+ tools=None,
+ max_tokens=4096,
+ max_tokens_param_fn=_max_tokens_fn,
+ provider_profile=profile,
+ )
+ assert kw["max_completion_tokens"] == 4096 # user overrides default
+
+
+class TestKimiParity:
+ """Kimi: OMIT temperature, max_tokens=32000, thinking + reasoning_effort."""
+
+ def test_temperature_omitted(self, transport):
+ kw = transport.build_kwargs(
+ model="kimi-k2",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("kimi-coding"),
+ omit_temperature=True,
+ )
+ assert "temperature" not in kw
+
+ def test_default_max_tokens(self, transport):
+ kw = transport.build_kwargs(
+ model="kimi-k2",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("kimi-coding"),
+ max_tokens_param_fn=_max_tokens_fn,
+ )
+ assert kw["max_completion_tokens"] == 32000
+
+ def test_thinking_enabled(self, transport):
+ kw = transport.build_kwargs(
+ model="kimi-k2",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("kimi-coding"),
+ reasoning_config={"enabled": True, "effort": "high"},
+ )
+ assert kw["extra_body"]["thinking"] == {"type": "enabled"}
+
+ def test_thinking_disabled(self, transport):
+ kw = transport.build_kwargs(
+ model="kimi-k2",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("kimi-coding"),
+ reasoning_config={"enabled": False},
+ )
+ assert kw["extra_body"]["thinking"] == {"type": "disabled"}
+
+ def test_reasoning_effort_top_level(self, transport):
+ """Kimi reasoning_effort is a TOP-LEVEL api_kwargs key, NOT in extra_body."""
+ kw = transport.build_kwargs(
+ model="kimi-k2",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("kimi-coding"),
+ reasoning_config={"enabled": True, "effort": "high"},
+ )
+ assert kw.get("reasoning_effort") == "high"
+ assert "reasoning_effort" not in kw.get("extra_body", {})
+
+ def test_reasoning_effort_default_medium(self, transport):
+ kw = transport.build_kwargs(
+ model="kimi-k2",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("kimi-coding"),
+ reasoning_config={"enabled": True},
+ )
+ assert kw.get("reasoning_effort") == "medium"
+
+
+class TestOpenRouterParity:
+ """OpenRouter: provider preferences, reasoning in extra_body."""
+
+ def test_provider_preferences(self, transport):
+ prefs = {"allow": ["anthropic"], "sort": "price"}
+ kw = transport.build_kwargs(
+ model="anthropic/claude-sonnet-4.6",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ provider_preferences=prefs,
+ )
+ assert kw["extra_body"]["provider"] == prefs
+
+ def test_reasoning_passes_full_config(self, transport):
+ """OpenRouter passes the FULL reasoning_config dict, not just effort."""
+ rc = {"enabled": True, "effort": "high"}
+ kw = transport.build_kwargs(
+ model="anthropic/claude-sonnet-4.6",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ supports_reasoning=True,
+ reasoning_config=rc,
+ )
+ assert kw["extra_body"]["reasoning"] == rc
+
+ def test_default_reasoning_when_no_config(self, transport):
+ """When supports_reasoning=True but no config, adds default."""
+ kw = transport.build_kwargs(
+ model="anthropic/claude-sonnet-4.6",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("openrouter"),
+ supports_reasoning=True,
+ )
+ assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
+
+
+class TestNousParity:
+ """Nous: product tags, reasoning, omit when disabled."""
+
+ def test_tags(self, transport):
+ kw = transport.build_kwargs(
+ model="hermes-3-llama-3.1-405b",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("nous"),
+ )
+ assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
+
+ def test_reasoning_omitted_when_disabled(self, transport):
+ """Nous special case: reasoning omitted entirely when disabled."""
+ kw = transport.build_kwargs(
+ model="hermes-3-llama-3.1-405b",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("nous"),
+ supports_reasoning=True,
+ reasoning_config={"enabled": False},
+ )
+ assert "reasoning" not in kw.get("extra_body", {})
+
+ def test_reasoning_enabled(self, transport):
+ rc = {"enabled": True, "effort": "high"}
+ kw = transport.build_kwargs(
+ model="hermes-3-llama-3.1-405b",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("nous"),
+ supports_reasoning=True,
+ reasoning_config=rc,
+ )
+ assert kw["extra_body"]["reasoning"] == rc
+
+
+class TestQwenParity:
+ """Qwen: max_tokens=65536, vl_high_resolution, metadata top-level."""
+
+ def test_default_max_tokens(self, transport):
+ kw = transport.build_kwargs(
+ model="qwen3.5-plus",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("qwen-oauth"),
+ max_tokens_param_fn=_max_tokens_fn,
+ )
+ assert kw["max_completion_tokens"] == 65536
+
+ def test_vl_high_resolution(self, transport):
+ kw = transport.build_kwargs(
+ model="qwen3.5-plus",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("qwen-oauth"),
+ )
+ assert kw["extra_body"]["vl_high_resolution_images"] is True
+
+ def test_metadata_top_level(self, transport):
+ """Qwen metadata goes to top-level api_kwargs, NOT extra_body."""
+ meta = {"sessionId": "s123", "promptId": "p456"}
+ kw = transport.build_kwargs(
+ model="qwen3.5-plus",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("qwen-oauth"),
+ qwen_session_metadata=meta,
+ )
+ assert kw["metadata"] == meta
+ assert "metadata" not in kw.get("extra_body", {})
+
+
+class TestCustomOllamaParity:
+ """Custom/Ollama: num_ctx, think=false — now tested via profile."""
+
+ def test_ollama_num_ctx(self, transport):
+ kw = transport.build_kwargs(
+ model="llama3.1",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("custom"),
+ ollama_num_ctx=131072,
+ )
+ assert kw["extra_body"]["options"]["num_ctx"] == 131072
+
+ def test_think_false_when_disabled(self, transport):
+ kw = transport.build_kwargs(
+ model="qwen3:72b",
+ messages=_simple_messages(),
+ tools=None,
+ provider_profile=get_provider_profile("custom"),
+ reasoning_config={"enabled": False, "effort": "none"},
+ )
+ assert kw["extra_body"]["think"] is False
diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py
index eb2b47f87a..c3f91f07f7 100644
--- a/tests/run_agent/test_run_agent.py
+++ b/tests/run_agent/test_run_agent.py
@@ -1097,6 +1097,7 @@ class TestBuildApiKwargs:
assert "temperature" not in kwargs
def test_kimi_coding_endpoint_omits_temperature(self, agent):
+ agent.provider = "kimi-coding"
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-k2.5"
@@ -1109,6 +1110,7 @@ class TestBuildApiKwargs:
def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent):
"""Kimi endpoint should send max_tokens=32000 and reasoning_effort as
top-level params, matching Kimi CLI's default behavior."""
+ agent.provider = "kimi-coding"
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
@@ -1121,6 +1123,7 @@ class TestBuildApiKwargs:
def test_kimi_coding_endpoint_respects_custom_effort(self, agent):
"""reasoning_effort should reflect reasoning_config.effort when set."""
+ agent.provider = "kimi-coding"
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
@@ -1134,6 +1137,7 @@ class TestBuildApiKwargs:
def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent):
"""Kimi endpoint should send extra_body.thinking={"type":"enabled"}
to activate reasoning mode, mirroring Kimi CLI's with_thinking()."""
+ agent.provider = "kimi-coding"
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
@@ -1147,6 +1151,7 @@ class TestBuildApiKwargs:
"""When reasoning_config.enabled=False, thinking should be disabled
and reasoning_effort should be omitted entirely — mirroring Kimi
CLI's with_thinking("off") which maps to reasoning_effort=None."""
+ agent.provider = "kimi-coding"
agent.base_url = "https://api.kimi.com/coding/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-for-coding"
@@ -1160,6 +1165,7 @@ class TestBuildApiKwargs:
def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent):
"""api.moonshot.ai should get the same Kimi-compatible params."""
+ agent.provider = "kimi-coding"
agent.base_url = "https://api.moonshot.ai/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-k2.5"
@@ -1173,6 +1179,7 @@ class TestBuildApiKwargs:
def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent):
"""api.moonshot.cn (China endpoint) should get the same params."""
+ agent.provider = "kimi-coding-cn"
agent.base_url = "https://api.moonshot.cn/v1"
agent._base_url_lower = agent.base_url.lower()
agent.model = "kimi-k2.5"
@@ -1185,6 +1192,7 @@ class TestBuildApiKwargs:
assert kwargs["extra_body"]["thinking"] == {"type": "enabled"}
def test_provider_preferences_injected(self, agent):
+ agent.provider = "openrouter"
agent.base_url = "https://openrouter.ai/api/v1"
agent.providers_allowed = ["Anthropic"]
messages = [{"role": "user", "content": "hi"}]
@@ -1193,6 +1201,7 @@ class TestBuildApiKwargs:
def test_reasoning_config_default_openrouter(self, agent):
"""Default reasoning config for OpenRouter should be medium."""
+ agent.provider = "openrouter"
agent.base_url = "https://openrouter.ai/api/v1"
agent.model = "anthropic/claude-sonnet-4-20250514"
messages = [{"role": "user", "content": "hi"}]
@@ -1202,6 +1211,7 @@ class TestBuildApiKwargs:
assert reasoning["effort"] == "medium"
def test_reasoning_config_custom(self, agent):
+ agent.provider = "openrouter"
agent.base_url = "https://openrouter.ai/api/v1"
agent.model = "anthropic/claude-sonnet-4-20250514"
agent.reasoning_config = {"enabled": False}
@@ -1217,6 +1227,7 @@ class TestBuildApiKwargs:
assert "reasoning" not in kwargs.get("extra_body", {})
def test_reasoning_sent_for_supported_openrouter_model(self, agent):
+ agent.provider = "openrouter"
agent.base_url = "https://openrouter.ai/api/v1"
agent.model = "qwen/qwen3.5-plus-02-15"
messages = [{"role": "user", "content": "hi"}]
@@ -1224,6 +1235,7 @@ class TestBuildApiKwargs:
assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
def test_reasoning_sent_for_nous_route(self, agent):
+ agent.provider = "nous"
agent.base_url = "https://inference-api.nousresearch.com/v1"
agent.model = "minimax/minimax-m2.5"
messages = [{"role": "user", "content": "hi"}]
@@ -1231,18 +1243,38 @@ class TestBuildApiKwargs:
assert kwargs["extra_body"]["reasoning"]["effort"] == "medium"
def test_reasoning_sent_for_copilot_gpt5(self, agent):
- agent.base_url = "https://api.githubcopilot.com"
- agent.model = "gpt-5.4"
- messages = [{"role": "user", "content": "hi"}]
- kwargs = agent._build_api_kwargs(messages)
+ """Copilot/GitHub Models: GPT-5 reasoning goes in extra_body.reasoning."""
+ from agent.transports import get_transport
+ from providers import get_provider_profile
+
+ transport = get_transport("chat_completions")
+ profile = get_provider_profile("copilot")
+ msgs = [{"role": "user", "content": "hi"}]
+ kwargs = transport.build_kwargs(
+ model="gpt-5.4",
+ messages=msgs,
+ tools=None,
+ supports_reasoning=True,
+ provider_profile=profile,
+ )
assert kwargs["extra_body"]["reasoning"] == {"effort": "medium"}
def test_reasoning_xhigh_normalized_for_copilot(self, agent):
- agent.base_url = "https://api.githubcopilot.com"
- agent.model = "gpt-5.4"
- agent.reasoning_config = {"enabled": True, "effort": "xhigh"}
- messages = [{"role": "user", "content": "hi"}]
- kwargs = agent._build_api_kwargs(messages)
+ """xhigh effort should normalize to high for Copilot GitHub Models."""
+ from agent.transports import get_transport
+ from providers import get_provider_profile
+
+ transport = get_transport("chat_completions")
+ profile = get_provider_profile("copilot")
+ msgs = [{"role": "user", "content": "hi"}]
+ kwargs = transport.build_kwargs(
+ model="gpt-5.4",
+ messages=msgs,
+ tools=None,
+ supports_reasoning=True,
+ reasoning_config={"enabled": True, "effort": "xhigh"},
+ provider_profile=profile,
+ )
assert kwargs["extra_body"]["reasoning"] == {"effort": "high"}
def test_reasoning_omitted_for_non_reasoning_copilot_model(self, agent):
@@ -1260,6 +1292,7 @@ class TestBuildApiKwargs:
def test_qwen_portal_formats_messages_and_metadata(self, agent):
+ agent.provider = "qwen-oauth"
agent.base_url = "https://portal.qwen.ai/v1"
agent._base_url_lower = agent.base_url.lower()
agent.session_id = "sess-123"
@@ -1276,6 +1309,7 @@ class TestBuildApiKwargs:
assert kwargs["messages"][2]["content"][0]["text"] == "hi"
def test_qwen_portal_normalizes_bare_string_content_parts(self, agent):
+ agent.provider = "qwen-oauth"
agent.base_url = "https://portal.qwen.ai/v1"
agent._base_url_lower = agent.base_url.lower()
messages = [
@@ -1288,6 +1322,7 @@ class TestBuildApiKwargs:
assert user_content[1] == {"type": "text", "text": "world"}
def test_qwen_portal_no_system_message(self, agent):
+ agent.provider = "qwen-oauth"
agent.base_url = "https://portal.qwen.ai/v1"
agent._base_url_lower = agent.base_url.lower()
messages = [{"role": "user", "content": "hi"}]
@@ -1308,6 +1343,7 @@ class TestBuildApiKwargs:
def test_qwen_portal_default_max_tokens(self, agent):
"""When max_tokens is None, Qwen Portal gets a default of 65536
to prevent reasoning models from exhausting their output budget."""
+ agent.provider = "qwen-oauth"
agent.base_url = "https://portal.qwen.ai/v1"
agent._base_url_lower = agent.base_url.lower()
agent.max_tokens = None
@@ -3843,7 +3879,7 @@ def test_aiagent_uses_copilot_acp_client():
patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")),
patch("run_agent.check_toolset_requirements", return_value={}),
patch("run_agent.OpenAI") as mock_openai,
- patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp_client,
+ patch("acp_adapter.copilot_client.CopilotACPClient") as mock_acp_client,
):
acp_client = MagicMock()
mock_acp_client.return_value = acp_client
diff --git a/uv.lock b/uv.lock
index dfb2f786b0..8ffbd3050e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -9,7 +9,7 @@ resolution-markers = [
]
[options]
-exclude-newer = "2026-04-17T16:49:45.944715922Z"
+exclude-newer = "2026-04-19T17:00:07.266826Z"
exclude-newer-span = "P7D"
[[package]]
@@ -564,30 +564,30 @@ wheels = [
[[package]]
name = "boto3"
-version = "1.42.89"
+version = "1.42.91"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "botocore" },
{ name = "jmespath" },
{ name = "s3transfer" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/bb/0c/f7bccb22b245cabf392816baba20f9e95f78ace7dbc580fd40136e80e732/boto3-1.42.89.tar.gz", hash = "sha256:3e43aacc0801bba9bcd23a8c271c089af297a69565f783fcdd357ae0e330bf1e", size = 113165, upload-time = "2026-04-13T19:36:17.516Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/a7/c0/98b8cec7ca22dde776df48c58940ae1abc425593959b7226e270760d726f/boto3-1.42.91.tar.gz", hash = "sha256:03d70532b17f7f84df37ca7e8c21553280454dea53ae12b15d1cfef9b16fcb8a", size = 113181, upload-time = "2026-04-17T19:31:06.251Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/b9/33/55103ba5ef9975ea54b8d39e69b76eb6e9fded3beae5f01065e26951a3a1/boto3-1.42.89-py3-none-any.whl", hash = "sha256:6204b189f4d0c655535f43d7eaa57ff4e8d965b8463c97e45952291211162932", size = 140556, upload-time = "2026-04-13T19:36:13.894Z" },
+ { url = "https://files.pythonhosted.org/packages/02/29/faba6521257c34085cc9b439ef98235b581772580f417fa3629728007270/boto3-1.42.91-py3-none-any.whl", hash = "sha256:04e72071cde022951ce7f81bd9933c90095ab8923e8ced61c8dacfe9edac0f5c", size = 140553, upload-time = "2026-04-17T19:31:02.57Z" },
]
[[package]]
name = "botocore"
-version = "1.42.89"
+version = "1.42.91"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "jmespath" },
{ name = "python-dateutil" },
{ name = "urllib3" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/0f/cc/e6be943efa9051bd15c2ee14077c2b10d6e27c9e9385fc43a03a5c4ed8b5/botocore-1.42.89.tar.gz", hash = "sha256:95ac52f472dad29942f3088b278ab493044516c16dbf9133c975af16527baa99", size = 15206290, upload-time = "2026-04-13T19:36:02.321Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/21/bc/a4b7c46471c2e789ad8c4c7acfd7f302fdb481d93ff870f441249b924ae6/botocore-1.42.91.tar.gz", hash = "sha256:d252e27bc454afdbf5ed3dc617aa423f2c855c081e98b7963093399483ecc698", size = 15213010, upload-time = "2026-04-17T19:30:50.793Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/91/f1/90a7b8eda38b7c3a65ca7ee0075bdf310b6b471cb1b95fab6e8994323a50/botocore-1.42.89-py3-none-any.whl", hash = "sha256:d9b786c8d9db6473063b4cc5be0ba7e6a381082307bd6afb69d4216f9fa95f35", size = 14887287, upload-time = "2026-04-13T19:35:56.677Z" },
+ { url = "https://files.pythonhosted.org/packages/b1/fc/24cc0a47c824f13933e210e9ad034b4fba22f7185b8d904c0fbf5a3b2be8/botocore-1.42.91-py3-none-any.whl", hash = "sha256:7a28c3cc6bfab5724ad18899d52402b776a0de7d87fa20c3c5270bcaaf199ce8", size = 14897344, upload-time = "2026-04-17T19:30:44.245Z" },
]
[[package]]
@@ -1759,6 +1759,77 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" },
]
+[[package]]
+name = "google-api-core"
+version = "2.30.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "google-auth" },
+ { name = "googleapis-common-protos" },
+ { name = "proto-plus" },
+ { name = "protobuf" },
+ { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/16/ce/502a57fb0ec752026d24df1280b162294b22a0afb98a326084f9a979138b/google_api_core-2.30.3.tar.gz", hash = "sha256:e601a37f148585319b26db36e219df68c5d07b6382cff2d580e83404e44d641b", size = 177001, upload-time = "2026-04-10T00:41:28.035Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/03/15/e56f351cf6ef1cfea58e6ac226a7318ed1deb2218c4b3cc9bd9e4b786c5a/google_api_core-2.30.3-py3-none-any.whl", hash = "sha256:a85761ba72c444dad5d611c2220633480b2b6be2521eca69cca2dbb3ffd6bfe8", size = 173274, upload-time = "2026-04-09T22:57:16.198Z" },
+]
+
+[[package]]
+name = "google-api-python-client"
+version = "2.194.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "google-api-core" },
+ { name = "google-auth" },
+ { name = "google-auth-httplib2" },
+ { name = "httplib2" },
+ { name = "uritemplate" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/60/ab/e83af0eb043e4ccc49571ca7a6a49984e9d00f4e9e6e6f1238d60bc84dce/google_api_python_client-2.194.0.tar.gz", hash = "sha256:db92647bd1a90f40b79c9618461553c2b20b6a43ce7395fa6de07132dc14f023", size = 14443469, upload-time = "2026-04-08T23:07:35.757Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/b0/34/5a624e49f179aa5b0cb87b2ce8093960299030ff40423bfbde09360eb908/google_api_python_client-2.194.0-py3-none-any.whl", hash = "sha256:61eaaac3b8fc8fdf11c08af87abc3d1342d1b37319cc1b57405f86ef7697e717", size = 15016514, upload-time = "2026-04-08T23:07:33.093Z" },
+]
+
+[[package]]
+name = "google-auth"
+version = "2.49.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "cryptography" },
+ { name = "pyasn1-modules" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c6/fc/e925290a1ad95c975c459e2df070fac2b90954e13a0370ac505dff78cb99/google_auth-2.49.2.tar.gz", hash = "sha256:c1ae38500e73065dcae57355adb6278cf8b5c8e391994ae9cbadbcb9631ab409", size = 333958, upload-time = "2026-04-10T00:41:21.888Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/73/76/d241a5c927433420507215df6cac1b1fa4ac0ba7a794df42a84326c68da8/google_auth-2.49.2-py3-none-any.whl", hash = "sha256:c2720924dfc82dedb962c9f52cabb2ab16714fd0a6a707e40561d217574ed6d5", size = 240638, upload-time = "2026-04-10T00:41:14.501Z" },
+]
+
+[[package]]
+name = "google-auth-httplib2"
+version = "0.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "google-auth" },
+ { name = "httplib2" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/ed/99/107612bef8d24b298bb5a7c8466f908ecda791d43f9466f5c3978f5b24c1/google_auth_httplib2-0.3.1.tar.gz", hash = "sha256:0af542e815784cb64159b4469aa5d71dd41069ba93effa006e1916b1dcd88e55", size = 11152, upload-time = "2026-03-30T22:50:26.766Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/97/e9/93afb14d23a949acaa3f4e7cc51a0024671174e116e35f42850764b99634/google_auth_httplib2-0.3.1-py3-none-any.whl", hash = "sha256:682356a90ef4ba3d06548c37e9112eea6fc00395a11b0303a644c1a86abc275c", size = 9534, upload-time = "2026-03-30T22:49:03.384Z" },
+]
+
+[[package]]
+name = "google-auth-oauthlib"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "google-auth" },
+ { name = "requests-oauthlib" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a6/82/62482931dcbe5266a2680d0da17096f2aab983ecb320277d9556700ce00e/google_auth_oauthlib-1.3.1.tar.gz", hash = "sha256:14c22c7b3dd3d06dbe44264144409039465effdd1eef94f7ce3710e486cc4bfa", size = 21663, upload-time = "2026-03-30T22:49:56.408Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2a/e0/cb454a95f460903e39f101e950038ec24a072ca69d0a294a6df625cc1627/google_auth_oauthlib-1.3.1-py3-none-any.whl", hash = "sha256:1a139ef23f1318756805b0e95f655c238bffd29655329a2978218248da4ee7f8", size = 19247, upload-time = "2026-03-30T20:02:23.894Z" },
+]
+
[[package]]
name = "googleapis-common-protos"
version = "1.73.0"
@@ -1912,6 +1983,9 @@ all = [
{ name = "elevenlabs" },
{ name = "fastapi" },
{ name = "faster-whisper" },
+ { name = "google-api-python-client" },
+ { name = "google-auth-httplib2" },
+ { name = "google-auth-oauthlib" },
{ name = "honcho-ai" },
{ name = "lark-oapi" },
{ name = "markdown", marker = "sys_platform == 'linux'" },
@@ -1965,6 +2039,11 @@ feishu = [
{ name = "lark-oapi" },
{ name = "qrcode" },
]
+google = [
+ { name = "google-api-python-client" },
+ { name = "google-auth-httplib2" },
+ { name = "google-auth-oauthlib" },
+]
homeassistant = [
{ name = "aiohttp" },
]
@@ -2064,6 +2143,9 @@ requires-dist = [
{ name = "faster-whisper", marker = "extra == 'voice'", specifier = ">=1.0.0,<2" },
{ name = "fire", specifier = ">=0.7.1,<1" },
{ name = "firecrawl-py", specifier = ">=4.16.0,<5" },
+ { name = "google-api-python-client", marker = "extra == 'google'", specifier = ">=2.100,<3" },
+ { name = "google-auth-httplib2", marker = "extra == 'google'", specifier = ">=0.2,<1" },
+ { name = "google-auth-oauthlib", marker = "extra == 'google'", specifier = ">=1.0,<2" },
{ name = "hermes-agent", extras = ["acp"], marker = "extra == 'all'" },
{ name = "hermes-agent", extras = ["acp"], marker = "extra == 'termux'" },
{ name = "hermes-agent", extras = ["bedrock"], marker = "extra == 'all'" },
@@ -2075,6 +2157,7 @@ requires-dist = [
{ name = "hermes-agent", extras = ["dev"], marker = "extra == 'all'" },
{ name = "hermes-agent", extras = ["dingtalk"], marker = "extra == 'all'" },
{ name = "hermes-agent", extras = ["feishu"], marker = "extra == 'all'" },
+ { name = "hermes-agent", extras = ["google"], marker = "extra == 'all'" },
{ name = "hermes-agent", extras = ["homeassistant"], marker = "extra == 'all'" },
{ name = "hermes-agent", extras = ["honcho"], marker = "extra == 'all'" },
{ name = "hermes-agent", extras = ["honcho"], marker = "extra == 'termux'" },
@@ -2136,7 +2219,7 @@ requires-dist = [
{ name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" },
{ name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c" },
]
-provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "bedrock", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"]
+provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "bedrock", "termux", "dingtalk", "feishu", "google", "web", "rl", "yc-bench", "all"]
[[package]]
name = "hf-transfer"
@@ -2238,6 +2321,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
]
+[[package]]
+name = "httplib2"
+version = "0.31.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pyparsing" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/c1/1f/e86365613582c027dda5ddb64e1010e57a3d53e99ab8a72093fa13d565ec/httplib2-0.31.2.tar.gz", hash = "sha256:385e0869d7397484f4eab426197a4c020b606edd43372492337c0b4010ae5d24", size = 250800, upload-time = "2026-01-23T11:04:44.165Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/2f/90/fd509079dfcab01102c0fdd87f3a9506894bc70afcf9e9785ef6b2b3aff6/httplib2-0.31.2-py3-none-any.whl", hash = "sha256:dbf0c2fa3862acf3c55c078ea9c0bc4481d7dc5117cae71be9514912cf9f8349", size = 91099, upload-time = "2026-01-23T11:04:42.78Z" },
+]
+
[[package]]
name = "httptools"
version = "0.7.1"
@@ -3277,6 +3372,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/57/a7/b35835e278c18b85206834b3aa3abe68e77a98769c59233d1f6300284781/numpy-2.4.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:4b42639cdde6d24e732ff823a3fa5b701d8acad89c4142bc1d0bd6dc85200ba5", size = 12504685, upload-time = "2026-03-09T07:58:50.525Z" },
]
+[[package]]
+name = "oauthlib"
+version = "3.3.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/19930f824ffeb0ad4372da4812c50edbd1434f678c90c2733e1188edfc63/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", size = 185918, upload-time = "2025-06-19T22:48:08.269Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" },
+]
+
[[package]]
name = "obstore"
version = "0.8.2"
@@ -3855,6 +3959,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" },
]
+[[package]]
+name = "proto-plus"
+version = "1.27.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "protobuf" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/81/0d/94dfe80193e79d55258345901acd2917523d56e8381bc4dee7fd38e3868a/proto_plus-1.27.2.tar.gz", hash = "sha256:b2adde53adadf75737c44d3dcb0104fde65250dfc83ad59168b4aa3e574b6a24", size = 57204, upload-time = "2026-03-26T22:18:57.174Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/84/f3/1fba73eeffafc998a25d59703b63f8be4fe8a5cb12eaff7386a0ba0f7125/proto_plus-1.27.2-py3-none-any.whl", hash = "sha256:6432f75893d3b9e70b9c412f1d2f03f65b11fb164b793d14ae2ca01821d22718", size = 50450, upload-time = "2026-03-26T22:13:42.927Z" },
+]
+
[[package]]
name = "protobuf"
version = "6.33.5"
@@ -3929,6 +4045,27 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/50/f2/c0e76a0b451ffdf0cf788932e182758eb7558953f4f27f1aff8e2518b653/pyarrow-23.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:527e8d899f14bd15b740cd5a54ad56b7f98044955373a17179d5956ddb93d9ce", size = 28365807, upload-time = "2026-02-16T10:14:03.892Z" },
]
+[[package]]
+name = "pyasn1"
+version = "0.6.3"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" },
+]
+
+[[package]]
+name = "pyasn1-modules"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "pyasn1" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" },
+]
+
[[package]]
name = "pycparser"
version = "3.0"
@@ -4529,6 +4666,19 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/56/5d/c814546c2333ceea4ba42262d8c4d55763003e767fa169adc693bd524478/requests-2.33.0-py3-none-any.whl", hash = "sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133dcde67d48cee69b", size = 65017, upload-time = "2026-03-25T15:10:40.382Z" },
]
+[[package]]
+name = "requests-oauthlib"
+version = "2.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "oauthlib" },
+ { name = "requests" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650, upload-time = "2024-03-22T20:32:29.939Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload-time = "2024-03-22T20:32:28.055Z" },
+]
+
[[package]]
name = "requests-toolbelt"
version = "1.0.0"
@@ -4664,27 +4814,27 @@ wheels = [
[[package]]
name = "ruff"
-version = "0.15.10"
+version = "0.15.11"
source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/e7/d9/aa3f7d59a10ef6b14fe3431706f854dbf03c5976be614a9796d36326810c/ruff-0.15.10.tar.gz", hash = "sha256:d1f86e67ebfdef88e00faefa1552b5e510e1d35f3be7d423dc7e84e63788c94e", size = 4631728, upload-time = "2026-04-09T14:06:09.884Z" }
+sdist = { url = "https://files.pythonhosted.org/packages/e4/8d/192f3d7103816158dfd5ea50d098ef2aec19194e6cbccd4b3485bdb2eb2d/ruff-0.15.11.tar.gz", hash = "sha256:f092b21708bf0e7437ce9ada249dfe688ff9a0954fc94abab05dcea7dcd29c33", size = 4637264, upload-time = "2026-04-16T18:46:26.58Z" }
wheels = [
- { url = "https://files.pythonhosted.org/packages/eb/00/a1c2fdc9939b2c03691edbda290afcd297f1f389196172826b03d6b6a595/ruff-0.15.10-py3-none-linux_armv6l.whl", hash = "sha256:0744e31482f8f7d0d10a11fcbf897af272fefdfcb10f5af907b18c2813ff4d5f", size = 10563362, upload-time = "2026-04-09T14:06:21.189Z" },
- { url = "https://files.pythonhosted.org/packages/5c/15/006990029aea0bebe9d33c73c3e28c80c391ebdba408d1b08496f00d422d/ruff-0.15.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b1e7c16ea0ff5a53b7c2df52d947e685973049be1cdfe2b59a9c43601897b22e", size = 10951122, upload-time = "2026-04-09T14:06:02.236Z" },
- { url = "https://files.pythonhosted.org/packages/f2/c0/4ac978fe874d0618c7da647862afe697b281c2806f13ce904ad652fa87e4/ruff-0.15.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:93cc06a19e5155b4441dd72808fdf84290d84ad8a39ca3b0f994363ade4cebb1", size = 10314005, upload-time = "2026-04-09T14:06:00.026Z" },
- { url = "https://files.pythonhosted.org/packages/da/73/c209138a5c98c0d321266372fc4e33ad43d506d7e5dd817dd89b60a8548f/ruff-0.15.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83e1dd04312997c99ea6965df66a14fb4f03ba978564574ffc68b0d61fd3989e", size = 10643450, upload-time = "2026-04-09T14:05:42.137Z" },
- { url = "https://files.pythonhosted.org/packages/ec/76/0deec355d8ec10709653635b1f90856735302cb8e149acfdf6f82a5feb70/ruff-0.15.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8154d43684e4333360fedd11aaa40b1b08a4e37d8ffa9d95fee6fa5b37b6fab1", size = 10379597, upload-time = "2026-04-09T14:05:49.984Z" },
- { url = "https://files.pythonhosted.org/packages/dc/be/86bba8fc8798c081e28a4b3bb6d143ccad3fd5f6f024f02002b8f08a9fa3/ruff-0.15.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ab88715f3a6deb6bde6c227f3a123410bec7b855c3ae331b4c006189e895cef", size = 11146645, upload-time = "2026-04-09T14:06:12.246Z" },
- { url = "https://files.pythonhosted.org/packages/a8/89/140025e65911b281c57be1d385ba1d932c2366ca88ae6663685aed8d4881/ruff-0.15.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a768ff5969b4f44c349d48edf4ab4f91eddb27fd9d77799598e130fb628aa158", size = 12030289, upload-time = "2026-04-09T14:06:04.776Z" },
- { url = "https://files.pythonhosted.org/packages/88/de/ddacca9545a5e01332567db01d44bd8cf725f2db3b3d61a80550b48308ea/ruff-0.15.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ee3ef42dab7078bda5ff6a1bcba8539e9857deb447132ad5566a038674540d0", size = 11496266, upload-time = "2026-04-09T14:05:55.485Z" },
- { url = "https://files.pythonhosted.org/packages/bc/bb/7ddb00a83760ff4a83c4e2fc231fd63937cc7317c10c82f583302e0f6586/ruff-0.15.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51cb8cc943e891ba99989dd92d61e29b1d231e14811db9be6440ecf25d5c1609", size = 11256418, upload-time = "2026-04-09T14:05:57.69Z" },
- { url = "https://files.pythonhosted.org/packages/dc/8d/55de0d35aacf6cd50b6ee91ee0f291672080021896543776f4170fc5c454/ruff-0.15.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:e59c9bdc056a320fb9ea1700a8d591718b8faf78af065484e801258d3a76bc3f", size = 11288416, upload-time = "2026-04-09T14:05:44.695Z" },
- { url = "https://files.pythonhosted.org/packages/68/cf/9438b1a27426ec46a80e0a718093c7f958ef72f43eb3111862949ead3cc1/ruff-0.15.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:136c00ca2f47b0018b073f28cb5c1506642a830ea941a60354b0e8bc8076b151", size = 10621053, upload-time = "2026-04-09T14:05:52.782Z" },
- { url = "https://files.pythonhosted.org/packages/4c/50/e29be6e2c135e9cd4cb15fbade49d6a2717e009dff3766dd080fcb82e251/ruff-0.15.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8b80a2f3c9c8a950d6237f2ca12b206bccff626139be9fa005f14feb881a1ae8", size = 10378302, upload-time = "2026-04-09T14:06:14.361Z" },
- { url = "https://files.pythonhosted.org/packages/18/2f/e0b36a6f99c51bb89f3a30239bc7bf97e87a37ae80aa2d6542d6e5150364/ruff-0.15.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:e3e53c588164dc025b671c9df2462429d60357ea91af7e92e9d56c565a9f1b07", size = 10850074, upload-time = "2026-04-09T14:06:16.581Z" },
- { url = "https://files.pythonhosted.org/packages/11/08/874da392558ce087a0f9b709dc6ec0d60cbc694c1c772dab8d5f31efe8cb/ruff-0.15.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b0c52744cf9f143a393e284125d2576140b68264a93c6716464e129a3e9adb48", size = 11358051, upload-time = "2026-04-09T14:06:18.948Z" },
- { url = "https://files.pythonhosted.org/packages/e4/46/602938f030adfa043e67112b73821024dc79f3ab4df5474c25fa4c1d2d14/ruff-0.15.10-py3-none-win32.whl", hash = "sha256:d4272e87e801e9a27a2e8df7b21011c909d9ddd82f4f3281d269b6ba19789ca5", size = 10588964, upload-time = "2026-04-09T14:06:07.14Z" },
- { url = "https://files.pythonhosted.org/packages/25/b6/261225b875d7a13b33a6d02508c39c28450b2041bb01d0f7f1a83d569512/ruff-0.15.10-py3-none-win_amd64.whl", hash = "sha256:28cb32d53203242d403d819fd6983152489b12e4a3ae44993543d6fe62ab42ed", size = 11745044, upload-time = "2026-04-09T14:05:39.473Z" },
- { url = "https://files.pythonhosted.org/packages/58/ed/dea90a65b7d9e69888890fb14c90d7f51bf0c1e82ad800aeb0160e4bacfd/ruff-0.15.10-py3-none-win_arm64.whl", hash = "sha256:601d1610a9e1f1c2165a4f561eeaa2e2ea1e97f3287c5aa258d3dab8b57c6188", size = 11035607, upload-time = "2026-04-09T14:05:47.593Z" },
+ { url = "https://files.pythonhosted.org/packages/02/1e/6aca3427f751295ab011828e15e9bf452200ac74484f1db4be0197b8170b/ruff-0.15.11-py3-none-linux_armv6l.whl", hash = "sha256:e927cfff503135c558eb581a0c9792264aae9507904eb27809cdcff2f2c847b7", size = 10607943, upload-time = "2026-04-16T18:46:05.967Z" },
+ { url = "https://files.pythonhosted.org/packages/e7/26/1341c262e74f36d4e84f3d6f4df0ac68cd53331a66bfc5080daa17c84c0b/ruff-0.15.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7a1b5b2938d8f890b76084d4fa843604d787a912541eae85fd7e233398bbb73e", size = 10988592, upload-time = "2026-04-16T18:46:00.742Z" },
+ { url = "https://files.pythonhosted.org/packages/03/71/850b1d6ffa9564fbb6740429bad53df1094082fe515c8c1e74b6d8d05f18/ruff-0.15.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d4176f3d194afbdaee6e41b9ccb1a2c287dba8700047df474abfbe773825d1cb", size = 10338501, upload-time = "2026-04-16T18:46:03.723Z" },
+ { url = "https://files.pythonhosted.org/packages/f2/11/cc1284d3e298c45a817a6aadb6c3e1d70b45c9b36d8d9cce3387b495a03a/ruff-0.15.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b17c886fb88203ced3afe7f14e8d5ae96e9d2f4ccc0ee66aa19f2c2675a27e4", size = 10670693, upload-time = "2026-04-16T18:46:41.941Z" },
+ { url = "https://files.pythonhosted.org/packages/ce/9e/f8288b034ab72b371513c13f9a41d9ba3effac54e24bfb467b007daee2ca/ruff-0.15.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:49fafa220220afe7758a487b048de4c8f9f767f37dfefad46b9dd06759d003eb", size = 10416177, upload-time = "2026-04-16T18:46:21.717Z" },
+ { url = "https://files.pythonhosted.org/packages/85/71/504d79abfd3d92532ba6bbe3d1c19fada03e494332a59e37c7c2dabae427/ruff-0.15.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2ab8427e74a00d93b8bda1307b1e60970d40f304af38bccb218e056c220120d", size = 11221886, upload-time = "2026-04-16T18:46:15.086Z" },
+ { url = "https://files.pythonhosted.org/packages/43/5a/947e6ab7a5ad603d65b474be15a4cbc6d29832db5d762cd142e4e3a74164/ruff-0.15.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:195072c0c8e1fc8f940652073df082e37a5d9cb43b4ab1e4d0566ab8977a13b7", size = 12075183, upload-time = "2026-04-16T18:46:07.944Z" },
+ { url = "https://files.pythonhosted.org/packages/9f/a1/0b7bb6268775fdd3a0818aee8efd8f5b4e231d24dd4d528ced2534023182/ruff-0.15.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a3a0996d486af3920dec930a2e7daed4847dfc12649b537a9335585ada163e9e", size = 11516575, upload-time = "2026-04-16T18:46:31.687Z" },
+ { url = "https://files.pythonhosted.org/packages/30/c3/bb5168fc4d233cc06e95f482770d0f3c87945a0cd9f614b90ea8dc2f2833/ruff-0.15.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bef2cb556d509259f1fe440bb9cd33c756222cf0a7afe90d15edf0866702431", size = 11306537, upload-time = "2026-04-16T18:46:36.988Z" },
+ { url = "https://files.pythonhosted.org/packages/e4/92/4cfae6441f3967317946f3b788136eecf093729b94d6561f963ed810c82e/ruff-0.15.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:030d921a836d7d4a12cf6e8d984a88b66094ccb0e0f17ddd55067c331191bf19", size = 11296813, upload-time = "2026-04-16T18:46:24.182Z" },
+ { url = "https://files.pythonhosted.org/packages/43/26/972784c5dde8313acde8ac71ba8ac65475b85db4a2352a76c9934361f9bc/ruff-0.15.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0e783b599b4577788dbbb66b9addcef87e9a8832f4ce0c19e34bf55543a2f890", size = 10633136, upload-time = "2026-04-16T18:46:39.802Z" },
+ { url = "https://files.pythonhosted.org/packages/5b/53/3985a4f185020c2f367f2e08a103032e12564829742a1b417980ce1514a0/ruff-0.15.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ae90592246625ba4a34349d68ec28d4400d75182b71baa196ddb9f82db025ef5", size = 10424701, upload-time = "2026-04-16T18:46:10.381Z" },
+ { url = "https://files.pythonhosted.org/packages/d3/57/bf0dfb32241b56c83bb663a826133da4bf17f682ba8c096973065f6e6a68/ruff-0.15.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1f111d62e3c983ed20e0ca2e800f8d77433a5b1161947df99a5c2a3fb60514f0", size = 10873887, upload-time = "2026-04-16T18:46:29.157Z" },
+ { url = "https://files.pythonhosted.org/packages/02/05/e48076b2a57dc33ee8c7a957296f97c744ca891a8ffb4ffb1aaa3b3f517d/ruff-0.15.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:06f483d6646f59eaffba9ae30956370d3a886625f511a3108994000480621d1c", size = 11404316, upload-time = "2026-04-16T18:46:19.462Z" },
+ { url = "https://files.pythonhosted.org/packages/88/27/0195d15fe7a897cbcba0904792c4b7c9fdd958456c3a17d2ea6093716a9a/ruff-0.15.11-py3-none-win32.whl", hash = "sha256:476a2aa56b7da0b73a3ee80b6b2f0e19cce544245479adde7baa65466664d5f3", size = 10655535, upload-time = "2026-04-16T18:46:12.47Z" },
+ { url = "https://files.pythonhosted.org/packages/3a/5e/c927b325bd4c1d3620211a4b96f47864633199feed60fa936025ab27e090/ruff-0.15.11-py3-none-win_amd64.whl", hash = "sha256:8b6756d88d7e234fb0c98c91511aae3cd519d5e3ed271cae31b20f39cb2a12a3", size = 11779692, upload-time = "2026-04-16T18:46:17.268Z" },
+ { url = "https://files.pythonhosted.org/packages/63/b6/aeadee5443e49baa2facd51131159fd6301cc4ccfc1541e4df7b021c37dd/ruff-0.15.11-py3-none-win_arm64.whl", hash = "sha256:063fed18cc1bbe0ee7393957284a6fe8b588c6a406a285af3ee3f46da2391ee4", size = 11032614, upload-time = "2026-04-16T18:46:34.487Z" },
]
[[package]]
@@ -5268,6 +5418,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/4c/a7/563b2d8fb7edc07320bf69ac6a7eedcd7a1a9d663a6bb90a4d9bd2eda5f7/unpaddedbase64-2.1.0-py3-none-any.whl", hash = "sha256:485eff129c30175d2cd6f0cd8d2310dff51e666f7f36175f738d75dfdbd0b1c6", size = 6083, upload-time = "2021-03-09T11:35:46.7Z" },
]
+[[package]]
+name = "uritemplate"
+version = "4.2.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/98/60/f174043244c5306c9988380d2cb10009f91563fc4b31293d27e17201af56/uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e", size = 33267, upload-time = "2025-06-02T15:12:06.318Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/a9/99/3ae339466c9183ea5b8ae87b34c0b897eda475d2aec2307cae60e5cd4f29/uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686", size = 11488, upload-time = "2025-06-02T15:12:03.405Z" },
+]
+
[[package]]
name = "urllib3"
version = "2.6.3"
diff --git a/website/docs/developer-guide/adding-providers.md b/website/docs/developer-guide/adding-providers.md
index 793d0354d1..5ec127d663 100644
--- a/website/docs/developer-guide/adding-providers.md
+++ b/website/docs/developer-guide/adding-providers.md
@@ -93,6 +93,42 @@ This path includes everything from Path A plus:
11. `run_agent.py`
12. `pyproject.toml` if a provider SDK is required
+## Fast path: Simple API-key providers
+
+If your provider is just an OpenAI-compatible endpoint that authenticates with a single API key, you do not need to touch `auth.py`, `runtime_provider.py`, `main.py`, or any of the other files in the full checklist below.
+
+All you need is:
+
+1. A file in `providers/` (e.g. `providers/myprovider.py`) that calls `register_provider()` with the provider config.
+2. That's it. `auth.py` auto-registers every file in `providers/` at startup via a module-level import sweep.
+
+When you add a `providers/*.py` file and call `register_provider()`, the following wire up automatically:
+
+1. `PROVIDER_REGISTRY` entry in `auth.py` (credential resolution, env-var lookup)
+2. `api_mode` set to `chat_completions`
+3. `base_url` sourced from the config or the declared env var
+4. `env_vars` checked in priority order for the API key
+5. `fallback_models` list registered for the provider
+6. `--provider` CLI flag accepts the provider id
+7. `hermes model` menu includes the provider
+8. `hermes setup` wizard delegates to `main.py` automatically
+9. `provider:model` alias syntax works
+10. Runtime resolver returns the correct `base_url` and `api_key`
+11. `HERMES_INFERENCE_PROVIDER` env-var override accepts the provider id
+12. Fallback model activation can switch into the provider cleanly
+
+See `providers/nvidia.py` or `providers/gmi.py` as a template.
+
+## Full path: OAuth and complex providers
+
+Use the full checklist below when your provider needs any of the following:
+
+- OAuth or token refresh (Nous Portal, Codex, Google Gemini, Qwen Portal, Copilot)
+- A non-OpenAI API shape that requires a new adapter (Anthropic Messages, Codex Responses)
+- Custom endpoint detection or multi-region probing (z.ai, Kimi)
+- A curated static model catalog or live `/models` fetch
+- Provider-specific `hermes model` menu entries with bespoke auth flows
+
## Step 1: Pick one canonical provider id
Choose a single provider id and use it everywhere.
diff --git a/website/docs/developer-guide/provider-runtime.md b/website/docs/developer-guide/provider-runtime.md
index bf9abe0ce5..beece151cc 100644
--- a/website/docs/developer-guide/provider-runtime.md
+++ b/website/docs/developer-guide/provider-runtime.md
@@ -20,6 +20,9 @@ Primary implementation:
- `hermes_cli/auth.py` — provider registry, `resolve_provider()`
- `hermes_cli/model_switch.py` — shared `/model` switch pipeline (CLI + gateway)
- `agent/auxiliary_client.py` — auxiliary model routing
+- `providers/` — declarative source for `api_mode`, `base_url`, `env_vars`, `fallback_models` (auto-registered into `auth.py` `PROVIDER_REGISTRY` at startup)
+
+`get_provider_profile()` in `providers/` returns a typed dict for a given provider id. `runtime_provider.py` calls this at resolution time to get the canonical `base_url`, `env_vars` priority list, `api_mode`, and `fallback_models` without needing to duplicate that data in multiple files. Adding a new `providers/*.py` file that calls `register_provider()` is enough for `runtime_provider.py` to pick it up — no branch needed in the resolver itself.
If you are trying to add a new first-class inference provider, read [Adding Providers](./adding-providers.md) alongside this page.
diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md
index c91bf6e007..a1f4d2201a 100644
--- a/website/docs/integrations/providers.md
+++ b/website/docs/integrations/providers.md
@@ -423,6 +423,44 @@ model:
For on-prem deployments (DGX Spark, local GPU), set `NVIDIA_BASE_URL=http://localhost:8000/v1`. NIM exposes the same OpenAI-compatible chat completions API as build.nvidia.com, so switching between cloud and local is a one-line env-var change.
:::
+### GMI Cloud
+
+Open and reasoning models via [GMI Cloud](https://inference.gmi.ai) — OpenAI-compatible API, API key authentication.
+
+```bash
+# GMI Cloud
+hermes chat --provider gmi --model deepseek-ai/DeepSeek-R1
+# Requires: GMI_API_KEY in ~/.hermes/.env
+```
+
+Or set it permanently in `config.yaml`:
+```yaml
+model:
+ provider: "gmi"
+ default: "deepseek-ai/DeepSeek-R1"
+```
+
+The base URL can be overridden with `GMI_BASE_URL` (default: `https://api.gmi.ai/v1`).
+
+### StepFun
+
+Step-series models via [StepFun](https://platform.stepfun.com) — OpenAI-compatible API, API key authentication.
+
+```bash
+# StepFun
+hermes chat --provider stepfun --model step-3-mini
+# Requires: STEPFUN_API_KEY in ~/.hermes/.env
+```
+
+Or set it permanently in `config.yaml`:
+```yaml
+model:
+ provider: "stepfun"
+ default: "step-3-mini"
+```
+
+The base URL can be overridden with `STEPFUN_BASE_URL` (default: `https://api.stepfun.com/v1`).
+
### Hugging Face Inference Providers
[Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover.
@@ -1178,7 +1216,7 @@ fallback_model:
When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session.
-Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `custom`.
+Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `gmi`, `stepfun`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `custom`.
:::tip
Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers).
diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md
index f324edf160..ad1f484bc7 100644
--- a/website/docs/reference/environment-variables.md
+++ b/website/docs/reference/environment-variables.md
@@ -65,6 +65,10 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config
| `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL |
| `NVIDIA_API_KEY` | NVIDIA NIM API key — Nemotron and open models ([build.nvidia.com](https://build.nvidia.com)) |
| `NVIDIA_BASE_URL` | Override NVIDIA base URL (default: `https://integrate.api.nvidia.com/v1`; set to `http://localhost:8000/v1` for a local NIM endpoint) |
+| `GMI_API_KEY` | GMI Cloud API key — open and reasoning models ([inference.gmi.ai](https://inference.gmi.ai)) |
+| `GMI_BASE_URL` | Override GMI Cloud base URL (default: `https://api.gmi.ai/v1`) |
+| `STEPFUN_API_KEY` | StepFun API key — Step-series models ([platform.stepfun.com](https://platform.stepfun.com)) |
+| `STEPFUN_BASE_URL` | Override StepFun base URL (default: `https://api.stepfun.com/v1`) |
| `OLLAMA_API_KEY` | Ollama Cloud API key — managed Ollama catalog without local GPU ([ollama.com/settings/keys](https://ollama.com/settings/keys)) |
| `OLLAMA_BASE_URL` | Override Ollama Cloud base URL (default: `https://ollama.com/v1`) |
| `XAI_API_KEY` | xAI (Grok) API key for chat + TTS ([console.x.ai](https://console.x.ai/)) |
@@ -91,7 +95,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe
| Variable | Description |
|----------|-------------|
-| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
+| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) |
| `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) |
| `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL |
| `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) |
diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md
index a0d699dfb2..b922bd7d6e 100644
--- a/website/docs/user-guide/features/fallback-providers.md
+++ b/website/docs/user-guide/features/fallback-providers.md
@@ -48,6 +48,8 @@ Both `provider` and `model` are **required**. If either is missing, the fallback
| MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` |
| DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` |
| NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) |
+| GMI Cloud | `gmi` | `GMI_API_KEY` (optional: `GMI_BASE_URL`) |
+| StepFun | `stepfun` | `STEPFUN_API_KEY` (optional: `STEPFUN_BASE_URL`) |
| Ollama Cloud | `ollama-cloud` | `OLLAMA_API_KEY` |
| Google Gemini (OAuth) | `google-gemini-cli` | `hermes model` (Google OAuth; optional: `HERMES_GEMINI_PROJECT_ID`) |
| Google AI Studio | `gemini` | `GOOGLE_API_KEY` (alias: `GEMINI_API_KEY`) |