From 84d1673e2fa829755fcffa0b09c6cb323c11a2f7 Mon Sep 17 00:00:00 2001 From: kshitijk4poor <82637225+kshitijk4poor@users.noreply.github.com> Date: Tue, 28 Apr 2026 08:56:12 +0530 Subject: [PATCH] =?UTF-8?q?feat:=20provider=20modules=20=E2=80=94=20Provid?= =?UTF-8?q?erProfile=20ABC,=2030=20providers,=20fetch=5Fmodels,=20transpor?= =?UTF-8?q?t=20single-path?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit feat: provider modules — ProviderProfile ABC, 29 providers, fetch_models, transport single-path Introduces providers/ as the single source of truth for every inference provider. All 29 providers declared with correct data cross-checked against auth.py, runtime_provider.py and auxiliary_client.py. Rebased onto main (30307a980). Incorporates post-salvage fixes from 56724147e (gmi aux model google/gemini-3.1-flash-lite-preview, already set in providers/gmi.py). --- acp_adapter/copilot_client.py | 632 +++++++++++++++++ agent/auxiliary_client.py | 78 ++- agent/copilot_acp_client.py | 648 +----------------- agent/model_metadata.py | 11 + agent/transports/__init__.py | 14 +- agent/transports/chat_completions.py | 312 +++++---- agent/transports/types.py | 29 +- hermes_cli/auth.py | 42 ++ hermes_cli/config.py | 42 ++ hermes_cli/doctor.py | 104 ++- hermes_cli/main.py | 58 +- hermes_cli/models.py | 47 ++ hermes_cli/runtime_provider.py | 33 +- providers/README.md | 307 +++++++++ providers/__init__.py | 76 ++ providers/alibaba.py | 13 + providers/anthropic.py | 52 ++ providers/arcee.py | 13 + providers/base.py | 165 +++++ providers/bedrock.py | 29 + providers/copilot.py | 58 ++ providers/copilot_acp.py | 34 + providers/custom.py | 71 ++ providers/deepseek.py | 20 + providers/gemini.py | 34 + providers/gmi.py | 26 + providers/huggingface.py | 20 + providers/kilocode.py | 14 + providers/kimi.py | 71 ++ providers/minimax.py | 31 + providers/nous.py | 53 ++ providers/nvidia.py | 21 + providers/ollama_cloud.py | 14 + providers/openai_codex.py | 15 + providers/opencode.py | 30 + providers/openrouter.py | 86 +++ providers/qwen.py | 82 +++ providers/stepfun.py | 14 + providers/vercel.py | 43 ++ providers/xai.py | 15 + providers/xiaomi.py | 13 + providers/zai.py | 21 + pyproject.toml | 2 +- run_agent.py | 165 +++-- tests/agent/test_copilot_acp_client.py | 4 +- tests/agent/test_minimax_provider.py | 14 +- .../agent/transports/test_chat_completions.py | 68 +- tests/hermes_cli/test_gmi_provider.py | 4 +- tests/providers/__init__.py | 0 tests/providers/test_e2e_wiring.py | 118 ++++ tests/providers/test_profile_wiring.py | 290 ++++++++ tests/providers/test_provider_profiles.py | 203 ++++++ tests/providers/test_transport_parity.py | 258 +++++++ tests/run_agent/test_run_agent.py | 56 +- uv.lock | 213 +++++- .../docs/developer-guide/adding-providers.md | 36 + .../docs/developer-guide/provider-runtime.md | 3 + website/docs/integrations/providers.md | 40 +- .../docs/reference/environment-variables.md | 6 +- .../user-guide/features/fallback-providers.md | 2 + 60 files changed, 3939 insertions(+), 1034 deletions(-) create mode 100644 acp_adapter/copilot_client.py create mode 100644 providers/README.md create mode 100644 providers/__init__.py create mode 100644 providers/alibaba.py create mode 100644 providers/anthropic.py create mode 100644 providers/arcee.py create mode 100644 providers/base.py create mode 100644 providers/bedrock.py create mode 100644 providers/copilot.py create mode 100644 providers/copilot_acp.py create mode 100644 providers/custom.py create mode 100644 providers/deepseek.py create mode 100644 providers/gemini.py create mode 100644 providers/gmi.py create mode 100644 providers/huggingface.py create mode 100644 providers/kilocode.py create mode 100644 providers/kimi.py create mode 100644 providers/minimax.py create mode 100644 providers/nous.py create mode 100644 providers/nvidia.py create mode 100644 providers/ollama_cloud.py create mode 100644 providers/openai_codex.py create mode 100644 providers/opencode.py create mode 100644 providers/openrouter.py create mode 100644 providers/qwen.py create mode 100644 providers/stepfun.py create mode 100644 providers/vercel.py create mode 100644 providers/xai.py create mode 100644 providers/xiaomi.py create mode 100644 providers/zai.py create mode 100644 tests/providers/__init__.py create mode 100644 tests/providers/test_e2e_wiring.py create mode 100644 tests/providers/test_profile_wiring.py create mode 100644 tests/providers/test_provider_profiles.py create mode 100644 tests/providers/test_transport_parity.py diff --git a/acp_adapter/copilot_client.py b/acp_adapter/copilot_client.py new file mode 100644 index 0000000000..e6a08e5975 --- /dev/null +++ b/acp_adapter/copilot_client.py @@ -0,0 +1,632 @@ +"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`. + +This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style +backend. Each request starts a short-lived ACP session, sends the formatted +conversation as a single prompt, collects text chunks, and converts the result +back into the minimal shape Hermes expects from an OpenAI client. +""" + +from __future__ import annotations + +import json +import os +import queue +import re +import shlex +import subprocess +import threading +import time +from collections import deque +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +from agent.file_safety import get_read_block_error, is_write_denied +from agent.redact import redact_sensitive_text + +ACP_MARKER_BASE_URL = "acp://copilot" +_DEFAULT_TIMEOUT_SECONDS = 900.0 + +_TOOL_CALL_BLOCK_RE = re.compile(r"\s*(\{.*?\})\s*", re.DOTALL) +_TOOL_CALL_JSON_RE = re.compile( + r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", + re.DOTALL, +) + + +def _resolve_command() -> str: + return ( + os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip() + or os.getenv("COPILOT_CLI_PATH", "").strip() + or "copilot" + ) + + +def _resolve_args() -> list[str]: + raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip() + if not raw: + return ["--acp", "--stdio"] + return shlex.split(raw) + + +def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]: + return { + "jsonrpc": "2.0", + "id": message_id, + "error": { + "code": code, + "message": message, + }, + } + + +def _permission_denied(message_id: Any) -> dict[str, Any]: + return { + "jsonrpc": "2.0", + "id": message_id, + "result": { + "outcome": { + "outcome": "cancelled", + } + }, + } + + +def _format_messages_as_prompt( + messages: list[dict[str, Any]], + model: str | None = None, + tools: list[dict[str, Any]] | None = None, + tool_choice: Any = None, +) -> str: + sections: list[str] = [ + "You are being used as the active ACP agent backend for Hermes.", + "Use ACP capabilities to complete tasks.", + "IMPORTANT: If you take an action with a tool, you MUST output tool calls using {...} blocks with JSON exactly in OpenAI function-call shape.", + "If no tool is needed, answer normally.", + ] + if model: + sections.append(f"Hermes requested model hint: {model}") + + if isinstance(tools, list) and tools: + tool_specs: list[dict[str, Any]] = [] + for t in tools: + if not isinstance(t, dict): + continue + fn = t.get("function") or {} + if not isinstance(fn, dict): + continue + name = fn.get("name") + if not isinstance(name, str) or not name.strip(): + continue + tool_specs.append( + { + "name": name.strip(), + "description": fn.get("description", ""), + "parameters": fn.get("parameters", {}), + } + ) + if tool_specs: + sections.append( + "Available tools (OpenAI function schema). " + "When using a tool, emit ONLY {...} with one JSON object " + "containing id/type/function{name,arguments}. arguments must be a JSON string.\n" + + json.dumps(tool_specs, ensure_ascii=False) + ) + + if tool_choice is not None: + sections.append( + f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}" + ) + + transcript: list[str] = [] + for message in messages: + if not isinstance(message, dict): + continue + role = str(message.get("role") or "unknown").strip().lower() + if role == "tool": + role = "tool" + elif role not in {"system", "user", "assistant"}: + role = "context" + + content = message.get("content") + rendered = _render_message_content(content) + if not rendered: + continue + + label = { + "system": "System", + "user": "User", + "assistant": "Assistant", + "tool": "Tool", + "context": "Context", + }.get(role, role.title()) + transcript.append(f"{label}:\n{rendered}") + + if transcript: + sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript)) + + sections.append("Continue the conversation from the latest user request.") + return "\n\n".join( + section.strip() for section in sections if section and section.strip() + ) + + +def _render_message_content(content: Any) -> str: + if content is None: + return "" + if isinstance(content, str): + return content.strip() + if isinstance(content, dict): + if "text" in content: + return str(content.get("text") or "").strip() + if "content" in content and isinstance(content.get("content"), str): + return str(content.get("content") or "").strip() + return json.dumps(content, ensure_ascii=True) + if isinstance(content, list): + parts: list[str] = [] + for item in content: + if isinstance(item, str): + parts.append(item) + elif isinstance(item, dict): + text = item.get("text") + if isinstance(text, str) and text.strip(): + parts.append(text.strip()) + return "\n".join(parts).strip() + return str(content).strip() + + +def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]: + if not isinstance(text, str) or not text.strip(): + return [], "" + + extracted: list[SimpleNamespace] = [] + consumed_spans: list[tuple[int, int]] = [] + + def _try_add_tool_call(raw_json: str) -> None: + try: + obj = json.loads(raw_json) + except Exception: + return + if not isinstance(obj, dict): + return + fn = obj.get("function") + if not isinstance(fn, dict): + return + fn_name = fn.get("name") + if not isinstance(fn_name, str) or not fn_name.strip(): + return + fn_args = fn.get("arguments", "{}") + if not isinstance(fn_args, str): + fn_args = json.dumps(fn_args, ensure_ascii=False) + call_id = obj.get("id") + if not isinstance(call_id, str) or not call_id.strip(): + call_id = f"acp_call_{len(extracted) + 1}" + + extracted.append( + SimpleNamespace( + id=call_id, + call_id=call_id, + response_item_id=None, + type="function", + function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args), + ) + ) + + for m in _TOOL_CALL_BLOCK_RE.finditer(text): + raw = m.group(1) + _try_add_tool_call(raw) + consumed_spans.append((m.start(), m.end())) + + # Only try bare-JSON fallback when no XML blocks were found. + if not extracted: + for m in _TOOL_CALL_JSON_RE.finditer(text): + raw = m.group(0) + _try_add_tool_call(raw) + consumed_spans.append((m.start(), m.end())) + + if not consumed_spans: + return extracted, text.strip() + + consumed_spans.sort() + merged: list[tuple[int, int]] = [] + for start, end in consumed_spans: + if not merged or start > merged[-1][1]: + merged.append((start, end)) + else: + merged[-1] = (merged[-1][0], max(merged[-1][1], end)) + + parts: list[str] = [] + cursor = 0 + for start, end in merged: + if cursor < start: + parts.append(text[cursor:start]) + cursor = max(cursor, end) + if cursor < len(text): + parts.append(text[cursor:]) + + cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip() + return extracted, cleaned + + +def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path: + candidate = Path(path_text) + if not candidate.is_absolute(): + raise PermissionError("ACP file-system paths must be absolute.") + resolved = candidate.resolve() + root = Path(cwd).resolve() + try: + resolved.relative_to(root) + except ValueError as exc: + raise PermissionError( + f"Path '{resolved}' is outside the session cwd '{root}'." + ) from exc + return resolved + + +class _ACPChatCompletions: + def __init__(self, client: CopilotACPClient): + self._client = client + + def create(self, **kwargs: Any) -> Any: + return self._client._create_chat_completion(**kwargs) + + +class _ACPChatNamespace: + def __init__(self, client: CopilotACPClient): + self.completions = _ACPChatCompletions(client) + + +class CopilotACPClient: + """Minimal OpenAI-client-compatible facade for Copilot ACP.""" + + def __init__( + self, + *, + api_key: str | None = None, + base_url: str | None = None, + default_headers: dict[str, str] | None = None, + acp_command: str | None = None, + acp_args: list[str] | None = None, + acp_cwd: str | None = None, + command: str | None = None, + args: list[str] | None = None, + **_: Any, + ): + self.api_key = api_key or "copilot-acp" + self.base_url = base_url or ACP_MARKER_BASE_URL + self._default_headers = dict(default_headers or {}) + self._acp_command = acp_command or command or _resolve_command() + self._acp_args = list(acp_args or args or _resolve_args()) + self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve()) + self.chat = _ACPChatNamespace(self) + self.is_closed = False + self._active_process: subprocess.Popen[str] | None = None + self._active_process_lock = threading.Lock() + + def close(self) -> None: + proc: subprocess.Popen[str] | None + with self._active_process_lock: + proc = self._active_process + self._active_process = None + self.is_closed = True + if proc is None: + return + try: + proc.terminate() + proc.wait(timeout=2) + except Exception: + try: + proc.kill() + except Exception: + pass + + def _create_chat_completion( + self, + *, + model: str | None = None, + messages: list[dict[str, Any]] | None = None, + timeout: float | None = None, + tools: list[dict[str, Any]] | None = None, + tool_choice: Any = None, + **_: Any, + ) -> Any: + prompt_text = _format_messages_as_prompt( + messages or [], + model=model, + tools=tools, + tool_choice=tool_choice, + ) + # Normalise timeout: run_agent.py may pass an httpx.Timeout object + # (used natively by the OpenAI SDK) rather than a plain float. + if timeout is None: + _effective_timeout = _DEFAULT_TIMEOUT_SECONDS + elif isinstance(timeout, (int, float)): + _effective_timeout = float(timeout) + else: + # httpx.Timeout or similar — pick the largest component so the + # subprocess has enough wall-clock time for the full response. + _candidates = [ + getattr(timeout, attr, None) + for attr in ("read", "write", "connect", "pool", "timeout") + ] + _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))] + _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS + + response_text, reasoning_text = self._run_prompt( + prompt_text, + timeout_seconds=_effective_timeout, + ) + + tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text) + + usage = SimpleNamespace( + prompt_tokens=0, + completion_tokens=0, + total_tokens=0, + prompt_tokens_details=SimpleNamespace(cached_tokens=0), + ) + assistant_message = SimpleNamespace( + content=cleaned_text, + tool_calls=tool_calls, + reasoning=reasoning_text or None, + reasoning_content=reasoning_text or None, + reasoning_details=None, + ) + finish_reason = "tool_calls" if tool_calls else "stop" + choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason) + return SimpleNamespace( + choices=[choice], + usage=usage, + model=model or "copilot-acp", + ) + + def _run_prompt( + self, prompt_text: str, *, timeout_seconds: float + ) -> tuple[str, str]: + try: + proc = subprocess.Popen( + [self._acp_command] + self._acp_args, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + cwd=self._acp_cwd, + ) + except FileNotFoundError as exc: + raise RuntimeError( + f"Could not start Copilot ACP command '{self._acp_command}'. " + "Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH." + ) from exc + + if proc.stdin is None or proc.stdout is None: + proc.kill() + raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.") + + self.is_closed = False + with self._active_process_lock: + self._active_process = proc + + inbox: queue.Queue[dict[str, Any]] = queue.Queue() + stderr_tail: deque[str] = deque(maxlen=40) + + def _stdout_reader() -> None: + if proc.stdout is None: + return + for line in proc.stdout: + try: + inbox.put(json.loads(line)) + except Exception: + inbox.put({"raw": line.rstrip("\n")}) + + def _stderr_reader() -> None: + if proc.stderr is None: + return + for line in proc.stderr: + stderr_tail.append(line.rstrip("\n")) + + out_thread = threading.Thread(target=_stdout_reader, daemon=True) + err_thread = threading.Thread(target=_stderr_reader, daemon=True) + out_thread.start() + err_thread.start() + + next_id = 0 + + def _request( + method: str, + params: dict[str, Any], + *, + text_parts: list[str] | None = None, + reasoning_parts: list[str] | None = None, + ) -> Any: + nonlocal next_id + next_id += 1 + request_id = next_id + payload = { + "jsonrpc": "2.0", + "id": request_id, + "method": method, + "params": params, + } + assert proc.stdin is not None # always set: Popen(stdin=PIPE) + proc.stdin.write(json.dumps(payload) + "\n") + proc.stdin.flush() + + deadline = time.time() + timeout_seconds + while time.time() < deadline: + if proc.poll() is not None: + break + try: + msg = inbox.get(timeout=0.1) + except queue.Empty: + continue + + if self._handle_server_message( + msg, + process=proc, + cwd=self._acp_cwd, + text_parts=text_parts, + reasoning_parts=reasoning_parts, + ): + continue + + if msg.get("id") != request_id: + continue + if "error" in msg: + err = msg.get("error") or {} + raise RuntimeError( + f"Copilot ACP {method} failed: {err.get('message') or err}" + ) + return msg.get("result") + + stderr_text = "\n".join(stderr_tail).strip() + if proc.poll() is not None and stderr_text: + raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}") + raise TimeoutError( + f"Timed out waiting for Copilot ACP response to {method}." + ) + + try: + _request( + "initialize", + { + "protocolVersion": 1, + "clientCapabilities": { + "fs": { + "readTextFile": True, + "writeTextFile": True, + } + }, + "clientInfo": { + "name": "hermes-agent", + "title": "Hermes Agent", + "version": "0.0.0", + }, + }, + ) + session = ( + _request( + "session/new", + { + "cwd": self._acp_cwd, + "mcpServers": [], + }, + ) + or {} + ) + session_id = str(session.get("sessionId") or "").strip() + if not session_id: + raise RuntimeError("Copilot ACP did not return a sessionId.") + + text_parts: list[str] = [] + reasoning_parts: list[str] = [] + _request( + "session/prompt", + { + "sessionId": session_id, + "prompt": [ + { + "type": "text", + "text": prompt_text, + } + ], + }, + text_parts=text_parts, + reasoning_parts=reasoning_parts, + ) + return "".join(text_parts), "".join(reasoning_parts) + finally: + self.close() + + def _handle_server_message( + self, + msg: dict[str, Any], + *, + process: subprocess.Popen[str], + cwd: str, + text_parts: list[str] | None, + reasoning_parts: list[str] | None, + ) -> bool: + method = msg.get("method") + if not isinstance(method, str): + return False + + if method == "session/update": + params = msg.get("params") or {} + update = params.get("update") or {} + kind = str(update.get("sessionUpdate") or "").strip() + content = update.get("content") or {} + chunk_text = "" + if isinstance(content, dict): + chunk_text = str(content.get("text") or "") + if kind == "agent_message_chunk" and chunk_text and text_parts is not None: + text_parts.append(chunk_text) + elif ( + kind == "agent_thought_chunk" + and chunk_text + and reasoning_parts is not None + ): + reasoning_parts.append(chunk_text) + return True + + if process.stdin is None: + return True + + message_id = msg.get("id") + params = msg.get("params") or {} + + if method == "session/request_permission": + response = _permission_denied(message_id) + elif method == "fs/read_text_file": + try: + path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd) + block_error = get_read_block_error(str(path)) + if block_error: + raise PermissionError(block_error) + content = path.read_text() if path.exists() else "" + line = params.get("line") + limit = params.get("limit") + if isinstance(line, int) and line > 1: + lines = content.splitlines(keepends=True) + start = line - 1 + end = ( + start + limit if isinstance(limit, int) and limit > 0 else None + ) + content = "".join(lines[start:end]) + if content: + content = redact_sensitive_text(content) + response = { + "jsonrpc": "2.0", + "id": message_id, + "result": { + "content": content, + }, + } + except Exception as exc: + response = _jsonrpc_error(message_id, -32602, str(exc)) + elif method == "fs/write_text_file": + try: + path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd) + if is_write_denied(str(path)): + raise PermissionError( + f"Write denied: '{path}' is a protected system/credential file." + ) + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(str(params.get("content") or "")) + response = { + "jsonrpc": "2.0", + "id": message_id, + "result": None, + } + except Exception as exc: + response = _jsonrpc_error(message_id, -32602, str(exc)) + else: + response = _jsonrpc_error( + message_id, + -32601, + f"ACP client method '{method}' is not supported by Hermes yet.", + ) + + process.stdin.write(json.dumps(response) + "\n") + process.stdin.flush() + return True diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index 13fb1c8924..6d34dabe55 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -151,23 +151,31 @@ def _fixed_temperature_for_model( return None # Default auxiliary models for direct API-key providers (cheap/fast for side tasks) -_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = { - "gemini": "gemini-3-flash-preview", - "zai": "glm-4.5-flash", - "kimi-coding": "kimi-k2-turbo-preview", - "stepfun": "step-3.5-flash", - "kimi-coding-cn": "kimi-k2-turbo-preview", - "gmi": "google/gemini-3.1-flash-lite-preview", - "minimax": "MiniMax-M2.7", - "minimax-cn": "MiniMax-M2.7", +def _get_aux_model_for_provider(provider_id: str) -> str: + """Return the cheap auxiliary model for a provider. + + Reads from ProviderProfile.default_aux_model first, falling back to the + legacy hardcoded dict for providers that predate the profiles system. + """ + try: + from providers import get_provider_profile + _p = get_provider_profile(provider_id) + if _p and _p.default_aux_model: + return _p.default_aux_model + except Exception: + pass + return _API_KEY_PROVIDER_AUX_MODELS_FALLBACK.get(provider_id, "") + + +# Fallback for providers not yet migrated to ProviderProfile.default_aux_model. +# New providers should set default_aux_model on their profile instead. +_API_KEY_PROVIDER_AUX_MODELS_FALLBACK: Dict[str, str] = { "anthropic": "claude-haiku-4-5-20251001", - "ai-gateway": "google/gemini-3-flash", - "opencode-zen": "gemini-3-flash", - "opencode-go": "glm-5", - "kilocode": "google/gemini-3-flash-preview", - "ollama-cloud": "nemotron-3-nano:30b", } +# Legacy alias — callers that haven't been updated yet can still use this. +_API_KEY_PROVIDER_AUX_MODELS: Dict[str, str] = _API_KEY_PROVIDER_AUX_MODELS_FALLBACK + # Vision-specific model overrides for direct providers. # When the user's main provider has a dedicated vision/multimodal model that # differs from their main chat model, map it here. The vision auto-detect @@ -868,7 +876,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: base_url = _to_openai_base_url( _pool_runtime_base_url(entry, pconfig.inference_base_url) or pconfig.inference_base_url ) - model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id) + model = _get_aux_model_for_provider(provider_id) or None if model is None: continue # skip provider if we don't know a valid aux model logger.debug("Auxiliary text client: %s (%s) via pool", pconfig.name, model) @@ -877,14 +885,22 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: if is_native_gemini_base_url(base_url): return GeminiNativeClient(api_key=api_key, base_url=base_url), model - extra = {} - if base_url_host_matches(base_url, "api.kimi.com"): - extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"} - elif base_url_host_matches(base_url, "api.githubcopilot.com"): - from hermes_cli.models import copilot_default_headers + extra = {} + if base_url_host_matches(base_url, "api.kimi.com"): + extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"} + elif base_url_host_matches(base_url, "api.githubcopilot.com"): + from hermes_cli.models import copilot_default_headers - extra["default_headers"] = copilot_default_headers() - return OpenAI(api_key=api_key, base_url=base_url, **extra), model + extra["default_headers"] = copilot_default_headers() + else: + try: + from providers import get_provider_profile as _gpf_aux + _ph_aux = _gpf_aux(provider_id) + if _ph_aux and _ph_aux.default_headers: + extra["default_headers"] = dict(_ph_aux.default_headers) + except Exception: + pass + return OpenAI(api_key=api_key, base_url=base_url, **extra), model creds = resolve_api_key_provider_credentials(provider_id) api_key = str(creds.get("api_key", "")).strip() @@ -894,7 +910,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: base_url = _to_openai_base_url( str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url ) - model = _API_KEY_PROVIDER_AUX_MODELS.get(provider_id) + model = _get_aux_model_for_provider(provider_id) or None if model is None: continue # skip provider if we don't know a valid aux model logger.debug("Auxiliary text client: %s (%s)", pconfig.name, model) @@ -910,6 +926,14 @@ def _resolve_api_key_provider() -> Tuple[Optional[OpenAI], Optional[str]]: from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() + else: + try: + from providers import get_provider_profile as _gpf_aux2 + _ph_aux2 = _gpf_aux2(provider_id) + if _ph_aux2 and _ph_aux2.default_headers: + extra["default_headers"] = dict(_ph_aux2.default_headers) + except Exception: + pass return OpenAI(api_key=api_key, base_url=base_url, **extra), model return None, None @@ -1258,7 +1282,7 @@ def _try_anthropic() -> Tuple[Optional[Any], Optional[str]]: from agent.anthropic_adapter import _is_oauth_token is_oauth = _is_oauth_token(token) - model = _API_KEY_PROVIDER_AUX_MODELS.get("anthropic", "claude-haiku-4-5-20251001") + model = _get_aux_model_for_provider("anthropic") or "claude-haiku-4-5-20251001" logger.debug("Auxiliary client: Anthropic native (%s) at %s (oauth=%s)", model, base_url, is_oauth) try: real_client = build_anthropic_client(token, base_url) @@ -1642,7 +1666,7 @@ def _to_async_client(sync_client, model: str, is_vision: bool = False): except ImportError: pass try: - from agent.copilot_acp_client import CopilotACPClient + from acp_adapter.copilot_client import CopilotACPClient if isinstance(sync_client, CopilotACPClient): return sync_client, model except ImportError: @@ -1986,7 +2010,7 @@ def resolve_provider_client( str(creds.get("base_url", "")).strip().rstrip("/") or pconfig.inference_base_url ) - default_model = _API_KEY_PROVIDER_AUX_MODELS.get(provider, "") + default_model = _get_aux_model_for_provider(provider) final_model = _normalize_resolved_model(model or default_model, provider) if provider == "gemini": @@ -2056,7 +2080,7 @@ def resolve_provider_client( "process credentials are incomplete" ) return None, None - from agent.copilot_acp_client import CopilotACPClient + from acp_adapter.copilot_client import CopilotACPClient client = CopilotACPClient( api_key=api_key, diff --git a/agent/copilot_acp_client.py b/agent/copilot_acp_client.py index 94d40d2d97..6ed499c42f 100644 --- a/agent/copilot_acp_client.py +++ b/agent/copilot_acp_client.py @@ -1,646 +1,8 @@ -"""OpenAI-compatible shim that forwards Hermes requests to `copilot --acp`. +"""Backward-compatibility shim. -This adapter lets Hermes treat the GitHub Copilot ACP server as a chat-style -backend. Each request starts a short-lived ACP session, sends the formatted -conversation as a single prompt, collects text chunks, and converts the result -back into the minimal shape Hermes expects from an OpenAI client. +CopilotACPClient has moved to acp_adapter/copilot_client.py. +This module re-exports it so existing callers continue to work. """ +from acp_adapter.copilot_client import CopilotACPClient # noqa: F401 -from __future__ import annotations - -import json -import os -import queue -import re -import shlex -import subprocess -import threading -import time -from collections import deque -from pathlib import Path -from types import SimpleNamespace -from typing import Any - -from agent.file_safety import get_read_block_error, is_write_denied -from agent.redact import redact_sensitive_text - -ACP_MARKER_BASE_URL = "acp://copilot" -_DEFAULT_TIMEOUT_SECONDS = 900.0 - -_TOOL_CALL_BLOCK_RE = re.compile(r"\s*(\{.*?\})\s*", re.DOTALL) -_TOOL_CALL_JSON_RE = re.compile(r"\{\s*\"id\"\s*:\s*\"[^\"]+\"\s*,\s*\"type\"\s*:\s*\"function\"\s*,\s*\"function\"\s*:\s*\{.*?\}\s*\}", re.DOTALL) - - -def _resolve_command() -> str: - return ( - os.getenv("HERMES_COPILOT_ACP_COMMAND", "").strip() - or os.getenv("COPILOT_CLI_PATH", "").strip() - or "copilot" - ) - - -def _resolve_args() -> list[str]: - raw = os.getenv("HERMES_COPILOT_ACP_ARGS", "").strip() - if not raw: - return ["--acp", "--stdio"] - return shlex.split(raw) - - -def _resolve_home_dir() -> str: - """Return a stable HOME for child ACP processes.""" - - try: - from hermes_constants import get_subprocess_home - - profile_home = get_subprocess_home() - if profile_home: - return profile_home - except Exception: - pass - - home = os.environ.get("HOME", "").strip() - if home: - return home - - expanded = os.path.expanduser("~") - if expanded and expanded != "~": - return expanded - - try: - import pwd - - resolved = pwd.getpwuid(os.getuid()).pw_dir.strip() - if resolved: - return resolved - except Exception: - pass - - # Last resort: /tmp (writable on any POSIX system). Avoids crashing the - # subprocess with no HOME; callers can set HERMES_HOME explicitly if they - # need a different writable dir. - return "/tmp" - - -def _build_subprocess_env() -> dict[str, str]: - env = os.environ.copy() - env["HOME"] = _resolve_home_dir() - return env - - -def _jsonrpc_error(message_id: Any, code: int, message: str) -> dict[str, Any]: - return { - "jsonrpc": "2.0", - "id": message_id, - "error": { - "code": code, - "message": message, - }, - } - - -def _permission_denied(message_id: Any) -> dict[str, Any]: - return { - "jsonrpc": "2.0", - "id": message_id, - "result": { - "outcome": { - "outcome": "cancelled", - } - }, - } - - -def _format_messages_as_prompt( - messages: list[dict[str, Any]], - model: str | None = None, - tools: list[dict[str, Any]] | None = None, - tool_choice: Any = None, -) -> str: - sections: list[str] = [ - "You are being used as the active ACP agent backend for Hermes.", - "Use ACP capabilities to complete tasks.", - "IMPORTANT: If you take an action with a tool, you MUST output tool calls using {...} blocks with JSON exactly in OpenAI function-call shape.", - "If no tool is needed, answer normally.", - ] - if model: - sections.append(f"Hermes requested model hint: {model}") - - if isinstance(tools, list) and tools: - tool_specs: list[dict[str, Any]] = [] - for t in tools: - if not isinstance(t, dict): - continue - fn = t.get("function") or {} - if not isinstance(fn, dict): - continue - name = fn.get("name") - if not isinstance(name, str) or not name.strip(): - continue - tool_specs.append( - { - "name": name.strip(), - "description": fn.get("description", ""), - "parameters": fn.get("parameters", {}), - } - ) - if tool_specs: - sections.append( - "Available tools (OpenAI function schema). " - "When using a tool, emit ONLY {...} with one JSON object " - "containing id/type/function{name,arguments}. arguments must be a JSON string.\n" - + json.dumps(tool_specs, ensure_ascii=False) - ) - - if tool_choice is not None: - sections.append(f"Tool choice hint: {json.dumps(tool_choice, ensure_ascii=False)}") - - transcript: list[str] = [] - for message in messages: - if not isinstance(message, dict): - continue - role = str(message.get("role") or "unknown").strip().lower() - if role == "tool": - role = "tool" - elif role not in {"system", "user", "assistant"}: - role = "context" - - content = message.get("content") - rendered = _render_message_content(content) - if not rendered: - continue - - label = { - "system": "System", - "user": "User", - "assistant": "Assistant", - "tool": "Tool", - "context": "Context", - }.get(role, role.title()) - transcript.append(f"{label}:\n{rendered}") - - if transcript: - sections.append("Conversation transcript:\n\n" + "\n\n".join(transcript)) - - sections.append("Continue the conversation from the latest user request.") - return "\n\n".join(section.strip() for section in sections if section and section.strip()) - - -def _render_message_content(content: Any) -> str: - if content is None: - return "" - if isinstance(content, str): - return content.strip() - if isinstance(content, dict): - if "text" in content: - return str(content.get("text") or "").strip() - if "content" in content and isinstance(content.get("content"), str): - return str(content.get("content") or "").strip() - return json.dumps(content, ensure_ascii=True) - if isinstance(content, list): - parts: list[str] = [] - for item in content: - if isinstance(item, str): - parts.append(item) - elif isinstance(item, dict): - text = item.get("text") - if isinstance(text, str) and text.strip(): - parts.append(text.strip()) - return "\n".join(parts).strip() - return str(content).strip() - - -def _extract_tool_calls_from_text(text: str) -> tuple[list[SimpleNamespace], str]: - if not isinstance(text, str) or not text.strip(): - return [], "" - - extracted: list[SimpleNamespace] = [] - consumed_spans: list[tuple[int, int]] = [] - - def _try_add_tool_call(raw_json: str) -> None: - try: - obj = json.loads(raw_json) - except Exception: - return - if not isinstance(obj, dict): - return - fn = obj.get("function") - if not isinstance(fn, dict): - return - fn_name = fn.get("name") - if not isinstance(fn_name, str) or not fn_name.strip(): - return - fn_args = fn.get("arguments", "{}") - if not isinstance(fn_args, str): - fn_args = json.dumps(fn_args, ensure_ascii=False) - call_id = obj.get("id") - if not isinstance(call_id, str) or not call_id.strip(): - call_id = f"acp_call_{len(extracted)+1}" - - extracted.append( - SimpleNamespace( - id=call_id, - call_id=call_id, - response_item_id=None, - type="function", - function=SimpleNamespace(name=fn_name.strip(), arguments=fn_args), - ) - ) - - for m in _TOOL_CALL_BLOCK_RE.finditer(text): - raw = m.group(1) - _try_add_tool_call(raw) - consumed_spans.append((m.start(), m.end())) - - # Only try bare-JSON fallback when no XML blocks were found. - if not extracted: - for m in _TOOL_CALL_JSON_RE.finditer(text): - raw = m.group(0) - _try_add_tool_call(raw) - consumed_spans.append((m.start(), m.end())) - - if not consumed_spans: - return extracted, text.strip() - - consumed_spans.sort() - merged: list[tuple[int, int]] = [] - for start, end in consumed_spans: - if not merged or start > merged[-1][1]: - merged.append((start, end)) - else: - merged[-1] = (merged[-1][0], max(merged[-1][1], end)) - - parts: list[str] = [] - cursor = 0 - for start, end in merged: - if cursor < start: - parts.append(text[cursor:start]) - cursor = max(cursor, end) - if cursor < len(text): - parts.append(text[cursor:]) - - cleaned = "\n".join(p.strip() for p in parts if p and p.strip()).strip() - return extracted, cleaned - - - -def _ensure_path_within_cwd(path_text: str, cwd: str) -> Path: - candidate = Path(path_text) - if not candidate.is_absolute(): - raise PermissionError("ACP file-system paths must be absolute.") - resolved = candidate.resolve() - root = Path(cwd).resolve() - try: - resolved.relative_to(root) - except ValueError as exc: - raise PermissionError(f"Path '{resolved}' is outside the session cwd '{root}'.") from exc - return resolved - - -class _ACPChatCompletions: - def __init__(self, client: "CopilotACPClient"): - self._client = client - - def create(self, **kwargs: Any) -> Any: - return self._client._create_chat_completion(**kwargs) - - -class _ACPChatNamespace: - def __init__(self, client: "CopilotACPClient"): - self.completions = _ACPChatCompletions(client) - - -class CopilotACPClient: - """Minimal OpenAI-client-compatible facade for Copilot ACP.""" - - def __init__( - self, - *, - api_key: str | None = None, - base_url: str | None = None, - default_headers: dict[str, str] | None = None, - acp_command: str | None = None, - acp_args: list[str] | None = None, - acp_cwd: str | None = None, - command: str | None = None, - args: list[str] | None = None, - **_: Any, - ): - self.api_key = api_key or "copilot-acp" - self.base_url = base_url or ACP_MARKER_BASE_URL - self._default_headers = dict(default_headers or {}) - self._acp_command = acp_command or command or _resolve_command() - self._acp_args = list(acp_args or args or _resolve_args()) - self._acp_cwd = str(Path(acp_cwd or os.getcwd()).resolve()) - self.chat = _ACPChatNamespace(self) - self.is_closed = False - self._active_process: subprocess.Popen[str] | None = None - self._active_process_lock = threading.Lock() - - def close(self) -> None: - proc: subprocess.Popen[str] | None - with self._active_process_lock: - proc = self._active_process - self._active_process = None - self.is_closed = True - if proc is None: - return - try: - proc.terminate() - proc.wait(timeout=2) - except Exception: - try: - proc.kill() - except Exception: - pass - - def _create_chat_completion( - self, - *, - model: str | None = None, - messages: list[dict[str, Any]] | None = None, - timeout: float | None = None, - tools: list[dict[str, Any]] | None = None, - tool_choice: Any = None, - **_: Any, - ) -> Any: - prompt_text = _format_messages_as_prompt( - messages or [], - model=model, - tools=tools, - tool_choice=tool_choice, - ) - # Normalise timeout: run_agent.py may pass an httpx.Timeout object - # (used natively by the OpenAI SDK) rather than a plain float. - if timeout is None: - _effective_timeout = _DEFAULT_TIMEOUT_SECONDS - elif isinstance(timeout, (int, float)): - _effective_timeout = float(timeout) - else: - # httpx.Timeout or similar — pick the largest component so the - # subprocess has enough wall-clock time for the full response. - _candidates = [ - getattr(timeout, attr, None) - for attr in ("read", "write", "connect", "pool", "timeout") - ] - _numeric = [float(v) for v in _candidates if isinstance(v, (int, float))] - _effective_timeout = max(_numeric) if _numeric else _DEFAULT_TIMEOUT_SECONDS - - response_text, reasoning_text = self._run_prompt( - prompt_text, - timeout_seconds=_effective_timeout, - ) - - tool_calls, cleaned_text = _extract_tool_calls_from_text(response_text) - - usage = SimpleNamespace( - prompt_tokens=0, - completion_tokens=0, - total_tokens=0, - prompt_tokens_details=SimpleNamespace(cached_tokens=0), - ) - assistant_message = SimpleNamespace( - content=cleaned_text, - tool_calls=tool_calls, - reasoning=reasoning_text or None, - reasoning_content=reasoning_text or None, - reasoning_details=None, - ) - finish_reason = "tool_calls" if tool_calls else "stop" - choice = SimpleNamespace(message=assistant_message, finish_reason=finish_reason) - return SimpleNamespace( - choices=[choice], - usage=usage, - model=model or "copilot-acp", - ) - - def _run_prompt(self, prompt_text: str, *, timeout_seconds: float) -> tuple[str, str]: - try: - proc = subprocess.Popen( - [self._acp_command] + self._acp_args, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - bufsize=1, - cwd=self._acp_cwd, - env=_build_subprocess_env(), - ) - except FileNotFoundError as exc: - raise RuntimeError( - f"Could not start Copilot ACP command '{self._acp_command}'. " - "Install GitHub Copilot CLI or set HERMES_COPILOT_ACP_COMMAND/COPILOT_CLI_PATH." - ) from exc - - if proc.stdin is None or proc.stdout is None: - proc.kill() - raise RuntimeError("Copilot ACP process did not expose stdin/stdout pipes.") - - self.is_closed = False - with self._active_process_lock: - self._active_process = proc - - inbox: queue.Queue[dict[str, Any]] = queue.Queue() - stderr_tail: deque[str] = deque(maxlen=40) - - def _stdout_reader() -> None: - if proc.stdout is None: - return - for line in proc.stdout: - try: - inbox.put(json.loads(line)) - except Exception: - inbox.put({"raw": line.rstrip("\n")}) - - def _stderr_reader() -> None: - if proc.stderr is None: - return - for line in proc.stderr: - stderr_tail.append(line.rstrip("\n")) - - out_thread = threading.Thread(target=_stdout_reader, daemon=True) - err_thread = threading.Thread(target=_stderr_reader, daemon=True) - out_thread.start() - err_thread.start() - - next_id = 0 - - def _request(method: str, params: dict[str, Any], *, text_parts: list[str] | None = None, reasoning_parts: list[str] | None = None) -> Any: - nonlocal next_id - next_id += 1 - request_id = next_id - payload = { - "jsonrpc": "2.0", - "id": request_id, - "method": method, - "params": params, - } - proc.stdin.write(json.dumps(payload) + "\n") - proc.stdin.flush() - - deadline = time.time() + timeout_seconds - while time.time() < deadline: - if proc.poll() is not None: - break - try: - msg = inbox.get(timeout=0.1) - except queue.Empty: - continue - - if self._handle_server_message( - msg, - process=proc, - cwd=self._acp_cwd, - text_parts=text_parts, - reasoning_parts=reasoning_parts, - ): - continue - - if msg.get("id") != request_id: - continue - if "error" in msg: - err = msg.get("error") or {} - raise RuntimeError( - f"Copilot ACP {method} failed: {err.get('message') or err}" - ) - return msg.get("result") - - stderr_text = "\n".join(stderr_tail).strip() - if proc.poll() is not None and stderr_text: - raise RuntimeError(f"Copilot ACP process exited early: {stderr_text}") - raise TimeoutError(f"Timed out waiting for Copilot ACP response to {method}.") - - try: - _request( - "initialize", - { - "protocolVersion": 1, - "clientCapabilities": { - "fs": { - "readTextFile": True, - "writeTextFile": True, - } - }, - "clientInfo": { - "name": "hermes-agent", - "title": "Hermes Agent", - "version": "0.0.0", - }, - }, - ) - session = _request( - "session/new", - { - "cwd": self._acp_cwd, - "mcpServers": [], - }, - ) or {} - session_id = str(session.get("sessionId") or "").strip() - if not session_id: - raise RuntimeError("Copilot ACP did not return a sessionId.") - - text_parts: list[str] = [] - reasoning_parts: list[str] = [] - _request( - "session/prompt", - { - "sessionId": session_id, - "prompt": [ - { - "type": "text", - "text": prompt_text, - } - ], - }, - text_parts=text_parts, - reasoning_parts=reasoning_parts, - ) - return "".join(text_parts), "".join(reasoning_parts) - finally: - self.close() - - def _handle_server_message( - self, - msg: dict[str, Any], - *, - process: subprocess.Popen[str], - cwd: str, - text_parts: list[str] | None, - reasoning_parts: list[str] | None, - ) -> bool: - method = msg.get("method") - if not isinstance(method, str): - return False - - if method == "session/update": - params = msg.get("params") or {} - update = params.get("update") or {} - kind = str(update.get("sessionUpdate") or "").strip() - content = update.get("content") or {} - chunk_text = "" - if isinstance(content, dict): - chunk_text = str(content.get("text") or "") - if kind == "agent_message_chunk" and chunk_text and text_parts is not None: - text_parts.append(chunk_text) - elif kind == "agent_thought_chunk" and chunk_text and reasoning_parts is not None: - reasoning_parts.append(chunk_text) - return True - - if process.stdin is None: - return True - - message_id = msg.get("id") - params = msg.get("params") or {} - - if method == "session/request_permission": - response = _permission_denied(message_id) - elif method == "fs/read_text_file": - try: - path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd) - block_error = get_read_block_error(str(path)) - if block_error: - raise PermissionError(block_error) - content = path.read_text() if path.exists() else "" - line = params.get("line") - limit = params.get("limit") - if isinstance(line, int) and line > 1: - lines = content.splitlines(keepends=True) - start = line - 1 - end = start + limit if isinstance(limit, int) and limit > 0 else None - content = "".join(lines[start:end]) - if content: - content = redact_sensitive_text(content) - response = { - "jsonrpc": "2.0", - "id": message_id, - "result": { - "content": content, - }, - } - except Exception as exc: - response = _jsonrpc_error(message_id, -32602, str(exc)) - elif method == "fs/write_text_file": - try: - path = _ensure_path_within_cwd(str(params.get("path") or ""), cwd) - if is_write_denied(str(path)): - raise PermissionError( - f"Write denied: '{path}' is a protected system/credential file." - ) - path.parent.mkdir(parents=True, exist_ok=True) - path.write_text(str(params.get("content") or "")) - response = { - "jsonrpc": "2.0", - "id": message_id, - "result": None, - } - except Exception as exc: - response = _jsonrpc_error(message_id, -32602, str(exc)) - else: - response = _jsonrpc_error( - message_id, - -32601, - f"ACP client method '{method}' is not supported by Hermes yet.", - ) - - process.stdin.write(json.dumps(response) + "\n") - process.stdin.flush() - return True +__all__ = ["CopilotACPClient"] diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 6ea1603565..6522a136df 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -313,6 +313,17 @@ _URL_TO_PROVIDER: Dict[str, str] = { "ollama.com": "ollama-cloud", } +# Auto-extend with hostnames derived from provider profiles. +# Any provider with a base_url not already in the map gets added automatically. +try: + from providers import list_providers as _list_providers + for _pp in _list_providers(): + _host = _pp.get_hostname() + if _host and _host not in _URL_TO_PROVIDER: + _URL_TO_PROVIDER[_host] = _pp.name +except Exception: + pass + def _infer_provider_from_url(base_url: str) -> Optional[str]: """Infer the models.dev provider name from a base URL. diff --git a/agent/transports/__init__.py b/agent/transports/__init__.py index d1c8251ed2..b606da7fec 100644 --- a/agent/transports/__init__.py +++ b/agent/transports/__init__.py @@ -6,9 +6,16 @@ Usage: result = transport.normalize_response(raw_response) """ -from agent.transports.types import NormalizedResponse, ToolCall, Usage, build_tool_call, map_finish_reason # noqa: F401 +from agent.transports.types import ( + NormalizedResponse, + ToolCall, + Usage, + build_tool_call, + map_finish_reason, +) # noqa: F401 _REGISTRY: dict = {} +_discovered: bool = False def register_transport(api_mode: str, transport_cls: type) -> None: @@ -23,6 +30,9 @@ def get_transport(api_mode: str): This allows gradual migration — call sites can check for None and fall back to the legacy code path. """ + global _discovered + if not _discovered: + _discover_transports() cls = _REGISTRY.get(api_mode) if cls is None: # The registry can be partially populated when a specific transport @@ -38,6 +48,8 @@ def get_transport(api_mode: str): def _discover_transports() -> None: """Import all transport modules to trigger auto-registration.""" + global _discovered + _discovered = True try: import agent.transports.anthropic # noqa: F401 except ImportError: diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py index 34d5caa88a..c50557c061 100644 --- a/agent/transports/chat_completions.py +++ b/agent/transports/chat_completions.py @@ -10,7 +10,7 @@ reasoning configuration, temperature handling, and extra_body assembly. """ import copy -from typing import Any, Dict, List, Optional +from typing import Any from agent.moonshot_schema import is_moonshot_model, sanitize_moonshot_tools from agent.prompt_builder import DEVELOPER_ROLE_MODELS @@ -28,7 +28,9 @@ class ChatCompletionsTransport(ProviderTransport): def api_mode(self) -> str: return "chat_completions" - def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]: + def convert_messages( + self, messages: list[dict[str, Any]], **kwargs + ) -> list[dict[str, Any]]: """Messages are already in OpenAI format — sanitize Codex leaks only. Strips Codex Responses API fields (``codex_reasoning_items`` / @@ -45,7 +47,9 @@ class ChatCompletionsTransport(ProviderTransport): tool_calls = msg.get("tool_calls") if isinstance(tool_calls, list): for tc in tool_calls: - if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc): + if isinstance(tc, dict) and ( + "call_id" in tc or "response_item_id" in tc + ): needs_sanitize = True break if needs_sanitize: @@ -68,76 +72,52 @@ class ChatCompletionsTransport(ProviderTransport): tc.pop("response_item_id", None) return sanitized - def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + def convert_tools(self, tools: list[dict[str, Any]]) -> list[dict[str, Any]]: """Tools are already in OpenAI format — identity.""" return tools def build_kwargs( self, model: str, - messages: List[Dict[str, Any]], - tools: Optional[List[Dict[str, Any]]] = None, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]] | None = None, **params, - ) -> Dict[str, Any]: + ) -> dict[str, Any]: """Build chat.completions.create() kwargs. - This is the most complex transport method — it handles ~16 providers - via params rather than subclasses. - - params: + params (all optional): timeout: float — API call timeout max_tokens: int | None — user-configured max tokens - ephemeral_max_output_tokens: int | None — one-shot override (error recovery) + ephemeral_max_output_tokens: int | None — one-shot override max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N} reasoning_config: dict | None request_overrides: dict | None session_id: str | None - qwen_session_metadata: dict | None — {sessionId, promptId} precomputed model_lower: str — lowercase model name for pattern matching - # Provider detection flags (all optional, default False) - is_openrouter: bool - is_nous: bool - is_qwen_portal: bool - is_github_models: bool - is_nvidia_nim: bool - is_kimi: bool - is_custom_provider: bool - ollama_num_ctx: int | None - # Provider routing - provider_preferences: dict | None - # Qwen-specific - qwen_prepare_fn: callable | None — runs AFTER codex sanitization - qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists - # Temperature - fixed_temperature: Any — from _fixed_temperature_for_model() - omit_temperature: bool - # Reasoning + # Provider profile path (all per-provider quirks live in providers/) + provider_profile: ProviderProfile | None — when present, delegates to + _build_kwargs_from_profile(); all flag params below are bypassed. + # Remaining flags — only used by the legacy fallback for unregistered + # providers (i.e. get_provider_profile() returned None). Known + # providers all go through provider_profile. + qwen_session_metadata: dict | None supports_reasoning: bool - github_reasoning_extra: dict | None - # Claude on OpenRouter/Nous max output anthropic_max_output: int | None - # Extra - extra_body_additions: dict | None — pre-built extra_body entries + extra_body_additions: dict | None """ # Codex sanitization: drop reasoning_items / call_id / response_item_id sanitized = self.convert_messages(messages) - # Qwen portal prep AFTER codex sanitization. If sanitize already - # deepcopied, reuse that copy via the in-place variant to avoid a - # second deepcopy. - is_qwen = params.get("is_qwen_portal", False) - if is_qwen: - qwen_prep = params.get("qwen_prepare_fn") - qwen_prep_inplace = params.get("qwen_prepare_inplace_fn") - if sanitized is messages: - if qwen_prep is not None: - sanitized = qwen_prep(sanitized) - else: - # Already deepcopied — transform in place - if qwen_prep_inplace is not None: - qwen_prep_inplace(sanitized) - elif qwen_prep is not None: - sanitized = qwen_prep(sanitized) + # ── Provider profile: single-path when present ────────────────── + _profile = params.get("provider_profile") + if _profile: + return self._build_kwargs_from_profile( + _profile, model, sanitized, tools, params + ) + + # ── Legacy fallback (unregistered / unknown provider) ─────────── + # Reached only when get_provider_profile() returned None. + # Known providers always go through the profile path above. # Developer role swap for GPT-5/Codex models model_lower = params.get("model_lower", (model or "").lower()) @@ -150,7 +130,7 @@ class ChatCompletionsTransport(ProviderTransport): sanitized = list(sanitized) sanitized[0] = {**sanitized[0], "role": "developer"} - api_kwargs: Dict[str, Any] = { + api_kwargs: dict[str, Any] = { "model": model, "messages": sanitized, } @@ -159,19 +139,6 @@ class ChatCompletionsTransport(ProviderTransport): if timeout is not None: api_kwargs["timeout"] = timeout - # Temperature - fixed_temp = params.get("fixed_temperature") - omit_temp = params.get("omit_temperature", False) - if omit_temp: - api_kwargs.pop("temperature", None) - elif fixed_temp is not None: - api_kwargs["temperature"] = fixed_temp - - # Qwen metadata (caller precomputes {sessionId, promptId}) - qwen_meta = params.get("qwen_session_metadata") - if qwen_meta and is_qwen: - api_kwargs["metadata"] = qwen_meta - # Tools if tools: # Moonshot/Kimi uses a stricter flavored JSON Schema. Rewriting @@ -186,96 +153,24 @@ class ChatCompletionsTransport(ProviderTransport): ephemeral = params.get("ephemeral_max_output_tokens") max_tokens = params.get("max_tokens") anthropic_max_out = params.get("anthropic_max_output") - is_nvidia_nim = params.get("is_nvidia_nim", False) - is_kimi = params.get("is_kimi", False) - reasoning_config = params.get("reasoning_config") if ephemeral is not None and max_tokens_fn: api_kwargs.update(max_tokens_fn(ephemeral)) elif max_tokens is not None and max_tokens_fn: api_kwargs.update(max_tokens_fn(max_tokens)) - elif is_nvidia_nim and max_tokens_fn: - api_kwargs.update(max_tokens_fn(16384)) - elif is_qwen and max_tokens_fn: - api_kwargs.update(max_tokens_fn(65536)) - elif is_kimi and max_tokens_fn: - # Kimi/Moonshot: 32000 matches Kimi CLI's default - api_kwargs.update(max_tokens_fn(32000)) elif anthropic_max_out is not None: api_kwargs["max_tokens"] = anthropic_max_out - # Kimi: top-level reasoning_effort (unless thinking disabled) - if is_kimi: - _kimi_thinking_off = bool( - reasoning_config - and isinstance(reasoning_config, dict) - and reasoning_config.get("enabled") is False - ) - if not _kimi_thinking_off: - _kimi_effort = "medium" - if reasoning_config and isinstance(reasoning_config, dict): - _e = (reasoning_config.get("effort") or "").strip().lower() - if _e in ("low", "medium", "high"): - _kimi_effort = _e - api_kwargs["reasoning_effort"] = _kimi_effort - # extra_body assembly - extra_body: Dict[str, Any] = {} + extra_body: dict[str, Any] = {} - is_openrouter = params.get("is_openrouter", False) - is_nous = params.get("is_nous", False) - is_github_models = params.get("is_github_models", False) - - provider_prefs = params.get("provider_preferences") - if provider_prefs and is_openrouter: - extra_body["provider"] = provider_prefs - - # Kimi extra_body.thinking - if is_kimi: - _kimi_thinking_enabled = True - if reasoning_config and isinstance(reasoning_config, dict): - if reasoning_config.get("enabled") is False: - _kimi_thinking_enabled = False - extra_body["thinking"] = { - "type": "enabled" if _kimi_thinking_enabled else "disabled", - } - - # Reasoning + # Generic reasoning passthrough for unknown providers if params.get("supports_reasoning", False): - if is_github_models: - gh_reasoning = params.get("github_reasoning_extra") - if gh_reasoning is not None: - extra_body["reasoning"] = gh_reasoning + reasoning_config = params.get("reasoning_config") + if reasoning_config is not None: + extra_body["reasoning"] = dict(reasoning_config) else: - if reasoning_config is not None: - rc = dict(reasoning_config) - if is_nous and rc.get("enabled") is False: - pass # omit for Nous when disabled - else: - extra_body["reasoning"] = rc - else: - extra_body["reasoning"] = {"enabled": True, "effort": "medium"} - - if is_nous: - extra_body["tags"] = ["product=hermes-agent"] - - # Ollama num_ctx - ollama_ctx = params.get("ollama_num_ctx") - if ollama_ctx: - options = extra_body.get("options", {}) - options["num_ctx"] = ollama_ctx - extra_body["options"] = options - - # Ollama/custom think=false - if params.get("is_custom_provider", False): - if reasoning_config and isinstance(reasoning_config, dict): - _effort = (reasoning_config.get("effort") or "").strip().lower() - _enabled = reasoning_config.get("enabled", True) - if _effort == "none" or _enabled is False: - extra_body["think"] = False - - if is_qwen: - extra_body["vl_high_resolution_images"] = True + extra_body["reasoning"] = {"enabled": True, "effort": "medium"} # Merge any pre-built extra_body additions additions = params.get("extra_body_additions") @@ -292,6 +187,117 @@ class ChatCompletionsTransport(ProviderTransport): return api_kwargs + def _build_kwargs_from_profile(self, profile, model, sanitized, tools, params): + """Build API kwargs using a ProviderProfile — single path, no legacy flags. + + This method replaces the entire flag-based kwargs assembly when a + provider_profile is passed. Every quirk comes from the profile object. + """ + from providers.base import OMIT_TEMPERATURE + + # Message preprocessing + sanitized = profile.prepare_messages(sanitized) + + # Developer role swap — model-name-based, applies to all providers + _model_lower = (model or "").lower() + if ( + sanitized + and isinstance(sanitized[0], dict) + and sanitized[0].get("role") == "system" + and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS) + ): + sanitized = list(sanitized) + sanitized[0] = {**sanitized[0], "role": "developer"} + + api_kwargs: dict[str, Any] = { + "model": model, + "messages": sanitized, + } + + # Temperature + if profile.fixed_temperature is OMIT_TEMPERATURE: + pass # Don't include temperature at all + elif profile.fixed_temperature is not None: + api_kwargs["temperature"] = profile.fixed_temperature + else: + # Use caller's temperature if provided + temp = params.get("temperature") + if temp is not None: + api_kwargs["temperature"] = temp + + # Timeout + timeout = params.get("timeout") + if timeout is not None: + api_kwargs["timeout"] = timeout + + # Tools — apply Moonshot/Kimi schema sanitization regardless of path + if tools: + if is_moonshot_model(model): + tools = sanitize_moonshot_tools(tools) + api_kwargs["tools"] = tools + + # max_tokens resolution — priority: ephemeral > user > profile default + max_tokens_fn = params.get("max_tokens_param_fn") + ephemeral = params.get("ephemeral_max_output_tokens") + user_max = params.get("max_tokens") + anthropic_max = params.get("anthropic_max_output") + + if ephemeral is not None and max_tokens_fn: + api_kwargs.update(max_tokens_fn(ephemeral)) + elif user_max is not None and max_tokens_fn: + api_kwargs.update(max_tokens_fn(user_max)) + elif profile.default_max_tokens and max_tokens_fn: + api_kwargs.update(max_tokens_fn(profile.default_max_tokens)) + elif anthropic_max is not None: + api_kwargs["max_tokens"] = anthropic_max + + # Provider-specific api_kwargs extras (reasoning_effort, metadata, etc.) + reasoning_config = params.get("reasoning_config") + extra_body_from_profile, top_level_from_profile = ( + profile.build_api_kwargs_extras( + reasoning_config=reasoning_config, + supports_reasoning=params.get("supports_reasoning", False), + qwen_session_metadata=params.get("qwen_session_metadata"), + model=model, + ollama_num_ctx=params.get("ollama_num_ctx"), + ) + ) + api_kwargs.update(top_level_from_profile) + + # extra_body assembly + extra_body: dict[str, Any] = {} + + # Profile's extra_body (tags, provider prefs, vl_high_resolution, etc.) + profile_body = profile.build_extra_body( + session_id=params.get("session_id"), + provider_preferences=params.get("provider_preferences"), + ) + if profile_body: + extra_body.update(profile_body) + + # Profile's reasoning/thinking extra_body entries + if extra_body_from_profile: + extra_body.update(extra_body_from_profile) + + # Merge any pre-built extra_body additions from the caller + additions = params.get("extra_body_additions") + if additions: + extra_body.update(additions) + + # Request overrides (user config) + overrides = params.get("request_overrides") + if overrides: + for k, v in overrides.items(): + if k == "extra_body" and isinstance(v, dict): + extra_body.update(v) + else: + api_kwargs[k] = v + + if extra_body: + api_kwargs["extra_body"] = extra_body + + return api_kwargs + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: """Normalize OpenAI ChatCompletion to NormalizedResponse. @@ -313,7 +319,7 @@ class ChatCompletionsTransport(ProviderTransport): # Gemini 3 thinking models attach extra_content with # thought_signature — without replay on the next turn the API # rejects the request with 400. - tc_provider_data: Dict[str, Any] = {} + tc_provider_data: dict[str, Any] = {} extra = getattr(tc, "extra_content", None) if extra is None and hasattr(tc, "model_extra"): extra = (tc.model_extra or {}).get("extra_content") @@ -324,12 +330,14 @@ class ChatCompletionsTransport(ProviderTransport): except Exception: pass tc_provider_data["extra_content"] = extra - tool_calls.append(ToolCall( - id=tc.id, - name=tc.function.name, - arguments=tc.function.arguments, - provider_data=tc_provider_data or None, - )) + tool_calls.append( + ToolCall( + id=tc.id, + name=tc.function.name, + arguments=tc.function.arguments, + provider_data=tc_provider_data or None, + ) + ) usage = None if hasattr(response, "usage") and response.usage: @@ -347,7 +355,7 @@ class ChatCompletionsTransport(ProviderTransport): reasoning = getattr(msg, "reasoning", None) reasoning_content = getattr(msg, "reasoning_content", None) - provider_data: Dict[str, Any] = {} + provider_data: dict[str, Any] = {} if reasoning_content: provider_data["reasoning_content"] = reasoning_content rd = getattr(msg, "reasoning_details", None) @@ -373,7 +381,7 @@ class ChatCompletionsTransport(ProviderTransport): return False return True - def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]: + def extract_cache_stats(self, response: Any) -> dict[str, int] | None: """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details.""" usage = getattr(response, "usage", None) if usage is None: diff --git a/agent/transports/types.py b/agent/transports/types.py index 68a807b47c..f0da1eb6f8 100644 --- a/agent/transports/types.py +++ b/agent/transports/types.py @@ -12,7 +12,7 @@ from __future__ import annotations import json from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional +from typing import Any @dataclass @@ -32,10 +32,10 @@ class ToolCall: * Others: ``None`` """ - id: Optional[str] + id: str | None name: str arguments: str # JSON string - provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False) + provider_data: dict[str, Any] | None = field(default=None, repr=False) # ── Backward compatibility ────────────────────────────────── # The agent loop reads tc.function.name / tc.function.arguments @@ -47,17 +47,17 @@ class ToolCall: return "function" @property - def function(self) -> "ToolCall": + def function(self) -> ToolCall: """Return self so tc.function.name / tc.function.arguments work.""" return self @property - def call_id(self) -> Optional[str]: + def call_id(self) -> str | None: """Codex call_id from provider_data, accessed via getattr by _build_assistant_message.""" return (self.provider_data or {}).get("call_id") @property - def response_item_id(self) -> Optional[str]: + def response_item_id(self) -> str | None: """Codex response_item_id from provider_data.""" return (self.provider_data or {}).get("response_item_id") @@ -101,18 +101,18 @@ class NormalizedResponse: * Others: ``None`` """ - content: Optional[str] - tool_calls: Optional[List[ToolCall]] + content: str | None + tool_calls: list[ToolCall] | None finish_reason: str # "stop", "tool_calls", "length", "content_filter" - reasoning: Optional[str] = None - usage: Optional[Usage] = None - provider_data: Optional[Dict[str, Any]] = field(default=None, repr=False) + reasoning: str | None = None + usage: Usage | None = None + provider_data: dict[str, Any] | None = field(default=None, repr=False) # ── Backward compatibility ────────────────────────────────── # The shim _nr_to_assistant_message() mapped these from provider_data. # These properties let NormalizedResponse pass through directly. @property - def reasoning_content(self) -> Optional[str]: + def reasoning_content(self) -> str | None: pd = self.provider_data or {} return pd.get("reasoning_content") @@ -136,8 +136,9 @@ class NormalizedResponse: # Factory helpers # --------------------------------------------------------------------------- + def build_tool_call( - id: Optional[str], + id: str | None, name: str, arguments: Any, **provider_fields: Any, @@ -151,7 +152,7 @@ def build_tool_call( return ToolCall(id=id, name=name, arguments=args_str, provider_data=pd) -def map_finish_reason(reason: Optional[str], mapping: Dict[str, str]) -> str: +def map_finish_reason(reason: str | None, mapping: dict[str, str]) -> str: """Translate a provider-specific stop reason to the normalised set. Falls back to ``"stop"`` for unknown or ``None`` reasons. diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index fb6a79d1ff..4586bffe73 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -374,6 +374,37 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { ), } +# Auto-extend PROVIDER_REGISTRY with any api-key provider registered in +# providers/ that is not already declared above. New providers only need a +# providers/*.py file — no edits to this file required. +try: + from providers import list_providers as _list_providers_for_registry + for _pp in _list_providers_for_registry(): + if _pp.name in PROVIDER_REGISTRY: + continue + if _pp.auth_type != "api_key" or not _pp.env_vars: + continue + # Skip providers that need custom token resolution (copilot, kimi, zai) + # — those are already fully declared above. + if _pp.name in {"copilot", "kimi-coding", "kimi-coding-cn", "zai"}: + continue + _api_key_vars = tuple(v for v in _pp.env_vars if not v.endswith("_BASE_URL") and not v.endswith("_URL")) + _base_url_var = next((v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), None) + PROVIDER_REGISTRY[_pp.name] = ProviderConfig( + id=_pp.name, + name=_pp.display_name or _pp.name, + auth_type="api_key", + inference_base_url=_pp.base_url, + api_key_env_vars=_api_key_vars or _pp.env_vars, + base_url_env_var=_base_url_var or "", + ) + # Also register aliases so resolve_provider() resolves them + for _alias in _pp.aliases: + if _alias not in PROVIDER_REGISTRY: + PROVIDER_REGISTRY[_alias] = PROVIDER_REGISTRY[_pp.name] +except Exception: + pass + # ============================================================================= # Anthropic Key Helper @@ -1150,6 +1181,17 @@ def resolve_provider( "vllm": "custom", "llamacpp": "custom", "llama.cpp": "custom", "llama-cpp": "custom", } + # Extend with aliases declared in providers/*.py that aren't already mapped. + # This keeps providers/ as the single source for new aliases while the + # hardcoded dict above remains authoritative for existing ones. + try: + from providers import list_providers as _lp + for _pp in _lp(): + for _alias in _pp.aliases: + if _alias not in _PROVIDER_ALIASES: + _PROVIDER_ALIASES[_alias] = _pp.name + except Exception: + pass normalized = _PROVIDER_ALIASES.get(normalized, normalized) if normalized == "openrouter": diff --git a/hermes_cli/config.py b/hermes_cli/config.py index bb11a5dff5..d690bdc523 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -4252,3 +4252,45 @@ def config_command(args): print(" hermes config path Show config file path") print(" hermes config env-path Show .env file path") sys.exit(1) + + +# ── Profile-driven env var injection ───────────────────────────────────────── +# Any provider registered in providers/ with auth_type="api_key" automatically +# gets its env_vars exposed in OPTIONAL_ENV_VARS without editing this file. +# Runs once at import time. + +_profile_env_vars_injected = False + + +def _inject_profile_env_vars() -> None: + """Populate OPTIONAL_ENV_VARS from provider profiles not already listed. + + Called once at module load time. Idempotent — repeated calls are no-ops. + """ + global _profile_env_vars_injected + if _profile_env_vars_injected: + return + _profile_env_vars_injected = True + try: + from providers import list_providers + for _pp in list_providers(): + if _pp.auth_type not in ("api_key",): + continue + for _var in _pp.env_vars: + if _var in OPTIONAL_ENV_VARS: + continue + _is_key = not _var.endswith("_BASE_URL") and not _var.endswith("_URL") + OPTIONAL_ENV_VARS[_var] = { + "description": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL override'}", + "prompt": f"{_pp.display_name or _pp.name} {'API key' if _is_key else 'base URL (leave empty for default)'}", + "url": _pp.signup_url or None, + "password": _is_key, + "category": "provider", + "advanced": True, + } + except Exception: + pass + + +# Eagerly inject so that OPTIONAL_ENV_VARS is fully populated at import time. +_inject_profile_env_vars() diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index dc346ac9b2..b85c95d10c 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -164,6 +164,84 @@ def _check_gateway_service_linger(issues: list[str]) -> None: check_warn("Could not verify systemd linger", f"({linger_detail})") +_APIKEY_PROVIDERS_CACHE: list | None = None + + +def _build_apikey_providers_list() -> list: + """Build the API-key provider health-check list once and cache it. + + Tuple format: (name, env_vars, default_url, base_env, supports_models_endpoint) + Base list augmented with any ProviderProfile with auth_type="api_key" not + already present — adding providers/*.py is sufficient to get into doctor. + """ + _static = [ + ("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True), + ("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True), + ("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True), + ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True), + ("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True), + ("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True), + ("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True), + ("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True), + ("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True), + ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True), + # MiniMax: the /anthropic endpoint doesn't support /models; use the /v1 surface. + ("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True), + ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True), + ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True), + ("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True), + ("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True), + # OpenCode Go has no shared /models endpoint; skip the health check. + ("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False), + ] + _known_names = {t[0] for t in _static} + # Also index by profile canonical name so profiles without display_name + # don't create duplicate entries for providers already in the static list. + _known_canonical: set[str] = set() + _name_to_canonical = { + "Z.AI / GLM": "zai", "Kimi / Moonshot": "kimi-coding", + "StepFun Step Plan": "stepfun", "Kimi / Moonshot (China)": "kimi-coding-cn", + "Arcee AI": "arcee", "GMI Cloud": "gmi", "DeepSeek": "deepseek", + "Hugging Face": "huggingface", "NVIDIA NIM": "nvidia", + "Alibaba/DashScope": "alibaba", "MiniMax": "minimax", + "MiniMax (China)": "minimax-cn", "Vercel AI Gateway": "ai-gateway", + "Kilo Code": "kilocode", "OpenCode Zen": "opencode-zen", + "OpenCode Go": "opencode-go", + } + for _label, _canonical in _name_to_canonical.items(): + _known_canonical.add(_canonical) + try: + from providers import list_providers + from providers.base import ProviderProfile as _PP + for _pp in list_providers(): + if not isinstance(_pp, _PP) or _pp.auth_type != "api_key" or not _pp.env_vars: + continue + _label = _pp.display_name or _pp.name + if _label in _known_names or _pp.name in _known_canonical: + continue + # Separate API-key vars from base-URL override vars — the health-check + # loop sends the first found value as Authorization: Bearer, so a URL + # string must never be picked. + _key_vars = tuple( + v for v in _pp.env_vars + if not v.endswith("_BASE_URL") and not v.endswith("_URL") + ) + _base_var = next( + (v for v in _pp.env_vars if v.endswith("_BASE_URL") or v.endswith("_URL")), + None, + ) + if not _key_vars: + continue + _models_url = ( + (_pp.models_url or (_pp.base_url.rstrip("/") + "/models")) + if _pp.base_url else None + ) + _static.append((_label, _key_vars, _models_url, _base_var, True)) + except Exception: + pass + return _static + + def run_doctor(args): """Run diagnostic checks.""" should_fix = getattr(args, 'fix', False) @@ -931,27 +1009,11 @@ def run_doctor(args): # -- API-key providers -- # Tuple: (name, env_vars, default_url, base_env, supports_models_endpoint) - # If supports_models_endpoint is False, we skip the health check and just show "configured" - _apikey_providers = [ - ("Z.AI / GLM", ("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), "https://api.z.ai/api/paas/v4/models", "GLM_BASE_URL", True), - ("Kimi / Moonshot", ("KIMI_API_KEY",), "https://api.moonshot.ai/v1/models", "KIMI_BASE_URL", True), - ("StepFun Step Plan", ("STEPFUN_API_KEY",), "https://api.stepfun.ai/step_plan/v1/models", "STEPFUN_BASE_URL", True), - ("Kimi / Moonshot (China)", ("KIMI_CN_API_KEY",), "https://api.moonshot.cn/v1/models", None, True), - ("Arcee AI", ("ARCEEAI_API_KEY",), "https://api.arcee.ai/api/v1/models", "ARCEE_BASE_URL", True), - ("GMI Cloud", ("GMI_API_KEY",), "https://api.gmi-serving.com/v1/models", "GMI_BASE_URL", True), - ("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True), - ("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True), - ("NVIDIA NIM", ("NVIDIA_API_KEY",), "https://integrate.api.nvidia.com/v1/models", "NVIDIA_BASE_URL", True), - ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True), - # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does. - ("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True), - ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True), - ("Vercel AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True), - ("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True), - ("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True), - # OpenCode Go has no shared /models endpoint; skip the health check. - ("OpenCode Go", ("OPENCODE_GO_API_KEY",), None, "OPENCODE_GO_BASE_URL", False), - ] + # Cached at module level after first build — profiles auto-extend it. + global _APIKEY_PROVIDERS_CACHE + if _APIKEY_PROVIDERS_CACHE is None: + _APIKEY_PROVIDERS_CACHE = _build_apikey_providers_list() + _apikey_providers = _APIKEY_PROVIDERS_CACHE for _pname, _env_vars, _default_url, _base_env, _supports_health_check in _apikey_providers: _key = "" for _ev in _env_vars: diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 1a01e67c46..a33f3e5463 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1528,6 +1528,21 @@ def cmd_model(args): select_provider_and_model(args=args) +def _is_profile_api_key_provider(provider_id: str) -> bool: + """Return True when provider_id maps to a profile with auth_type='api_key'. + + Used as a catch-all in select_provider_and_model() so that new providers + declared in providers/*.py automatically dispatch to _model_flow_api_key_provider + without requiring an explicit elif branch here. + """ + try: + from providers import get_provider_profile + _p = get_provider_profile(provider_id) + return _p is not None and _p.auth_type == "api_key" + except Exception: + return False + + def select_provider_and_model(args=None): """Core provider selection + model picking logic. @@ -1820,7 +1835,7 @@ def select_provider_and_model(args=None): "gmi", "nvidia", "ollama-cloud", - ): + ) or _is_profile_api_key_provider(selected_provider): _model_flow_api_key_provider(config, selected_provider, current_model) # ── Post-switch cleanup: clear stale OPENAI_BASE_URL ────────────── @@ -7618,6 +7633,22 @@ def cmd_logs(args): ) +def _build_provider_choices() -> list[str]: + """Build the --provider choices list from CANONICAL_PROVIDERS + 'auto'.""" + try: + from hermes_cli.models import CANONICAL_PROVIDERS as _cp + return ["auto"] + [p.slug for p in _cp] + except Exception: + # Fallback: static list guarantees the CLI always works + return [ + "auto", "openrouter", "nous", "openai-codex", "copilot-acp", "copilot", + "anthropic", "gemini", "google-gemini-cli", "xai", "bedrock", "azure-foundry", + "ollama-cloud", "huggingface", "zai", "kimi-coding", "kimi-coding-cn", + "stepfun", "minimax", "minimax-cn", "kilocode", "xiaomi", "arcee", + "nvidia", "deepseek", "alibaba", "qwen-oauth", "opencode-zen", "opencode-go", + ] + + def main(): """Main entry point for hermes CLI.""" parser = argparse.ArgumentParser( @@ -7811,30 +7842,7 @@ For more help on a command: ) chat_parser.add_argument( "--provider", - choices=[ - "auto", - "openrouter", - "nous", - "openai-codex", - "copilot-acp", - "copilot", - "anthropic", - "gemini", - "xai", - "ollama-cloud", - "huggingface", - "zai", - "kimi-coding", - "kimi-coding-cn", - "stepfun", - "minimax", - "minimax-cn", - "kilocode", - "xiaomi", - "arcee", - "gmi", - "nvidia", - ], + choices=_build_provider_choices(), default=None, help="Inference provider (default: auto)", ) diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 28ca6d7dea..96d67b2a24 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -750,6 +750,25 @@ CANONICAL_PROVIDERS: list[ProviderEntry] = [ ProviderEntry("azure-foundry", "Azure Foundry", "Azure Foundry (OpenAI-style or Anthropic-style endpoint — your Azure AI deployment)"), ] +# Auto-extend CANONICAL_PROVIDERS with any provider registered in providers/ +# that is not already in the list above. Adding providers/*.py is sufficient +# to expose a new provider in the model picker, /model, and all downstream +# consumers — no edits to this file needed. +_canonical_slugs = {p.slug for p in CANONICAL_PROVIDERS} +try: + from providers import list_providers as _list_providers_for_canonical + for _pp in _list_providers_for_canonical(): + if _pp.name in _canonical_slugs: + continue + if _pp.auth_type in ("oauth_device_code", "oauth_external", "external_process", "aws_sdk", "copilot"): + continue # non-api-key flows need bespoke picker UX; skip auto-inject + _label = _pp.display_name or _pp.name + _desc = _pp.description or f"{_label} (direct API)" + CANONICAL_PROVIDERS.append(ProviderEntry(_pp.name, _label, _desc)) + _canonical_slugs.add(_pp.name) +except Exception: + pass + # Derived dicts — used throughout the codebase _PROVIDER_LABELS = {p.slug: p.label for p in CANONICAL_PROVIDERS} _PROVIDER_LABELS["custom"] = "Custom endpoint" # special case: not a named provider @@ -1884,6 +1903,34 @@ def provider_model_ids(provider: Optional[str], *, force_refresh: bool = False) live = fetch_api_models(api_key, base_url) if live: return live + + # ── Profile-based generic live fetch (all simple api-key providers) ── + # Handles any provider registered in providers/ with auth_type="api_key". + # Replaces per-provider copy-paste blocks (stepfun, gmi, zai, etc.). + try: + from providers import get_provider_profile + from hermes_cli.auth import resolve_api_key_provider_credentials + + _p = get_provider_profile(normalized) + if _p and _p.auth_type == "api_key" and _p.base_url: + try: + creds = resolve_api_key_provider_credentials(normalized) + api_key = str(creds.get("api_key") or "").strip() + base_url = str(creds.get("base_url") or "").strip() + except Exception: + api_key, base_url = "", _p.base_url + if not base_url: + base_url = _p.base_url + if api_key: + live = _p.fetch_models(api_key=api_key) + if live: + return live + # Use profile's fallback_models if defined + if _p.fallback_models: + return list(_p.fallback_models) + except Exception: + pass + curated_static = list(_PROVIDER_MODELS.get(normalized, [])) if normalized in _MODELS_DEV_PREFERRED: return _merge_with_models_dev(normalized, curated_static) diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 1fe5acc2b6..54538c22e8 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -214,10 +214,6 @@ def _resolve_runtime_from_pool_entry( base_url = cfg_base_url or base_url or "https://api.anthropic.com" elif provider == "openrouter": base_url = base_url or OPENROUTER_BASE_URL - elif provider == "xai": - api_mode = "codex_responses" - elif provider == "nous": - api_mode = "chat_completions" elif provider == "copilot": api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", "")) base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url @@ -249,6 +245,14 @@ def _resolve_runtime_from_pool_entry( base_url = re.sub(r"/v1/?$", "", base_url) else: configured_provider = str(model_cfg.get("provider") or "").strip().lower() + # Use profile api_mode for all other known providers + try: + from providers import get_provider_profile + _p = get_provider_profile(provider) + if _p and _p.api_mode: + api_mode = _p.api_mode + except Exception: + pass # Honour model.base_url from config.yaml when the configured provider # matches this provider — same pattern as the Anthropic branch above. # Only override when the pool entry has no explicit base_url (i.e. it @@ -266,12 +270,21 @@ def _resolve_runtime_from_pool_entry( from hermes_cli.models import opencode_model_api_mode api_mode = opencode_model_api_mode(provider, effective_model) else: - # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, - # Kimi /coding, api.openai.com → codex_responses, api.x.ai → - # codex_responses). - detected = _detect_api_mode_for_url(base_url) - if detected: - api_mode = detected + # Try profile api_mode first, then auto-detect from URL + try: + from providers import get_provider_profile + _p = get_provider_profile(provider) + if _p and _p.api_mode: + api_mode = _p.api_mode + except Exception: + pass + if api_mode == "chat_completions": + # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, + # Kimi /coding, api.openai.com → codex_responses, api.x.ai → + # codex_responses). + detected = _detect_api_mode_for_url(base_url) + if detected: + api_mode = detected # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the # Anthropic SDK prepends its own /v1/messages to the base_url. Strip the diff --git a/providers/README.md b/providers/README.md new file mode 100644 index 0000000000..786bc3c2e9 --- /dev/null +++ b/providers/README.md @@ -0,0 +1,307 @@ +# providers/ + +Single source of truth for every inference provider Hermes knows about. + +Each provider is declared once here as a `ProviderProfile`. Every other layer — +auth resolution, transport kwargs, model listing, runtime routing — reads from +these profiles instead of maintaining its own parallel data. + +--- + +## Directory layout + +``` +providers/ +├── base.py ProviderProfile dataclass + OMIT_TEMPERATURE sentinel +├── __init__.py Registry: register_provider(), get_provider_profile() +├── README.md This file +│ +├── # Simple providers — just identity + auth + endpoint +├── alibaba.py Alibaba Cloud DashScope +├── arcee.py Arcee AI +├── bedrock.py AWS Bedrock (api_mode=bedrock_converse) +├── deepseek.py DeepSeek +├── huggingface.py Hugging Face Inference API +├── kilocode.py Kilo Code +├── minimax.py MiniMax (international + CN) +├── nvidia.py NVIDIA NIM (default_max_tokens=16384) +├── ollama_cloud.py Ollama Cloud +├── stepfun.py StepFun +├── xiaomi.py Xiaomi MiMo +├── xai.py xAI Grok (api_mode=codex_responses) +├── zai.py Z.AI / GLM +│ +├── # Medium — one or two quirks +├── anthropic.py Native Anthropic (x-api-key header, api_mode=anthropic_messages) +├── copilot.py GitHub Copilot (auth_type=copilot, reasoning per model) +├── copilot_acp.py Copilot ACP subprocess (api_mode=copilot_acp) +├── custom.py Custom/Ollama local (think=false, num_ctx) +├── gemini.py Google Gemini AI Studio + Cloud Code OAuth +├── kimi.py Kimi Coding (OMIT_TEMPERATURE, thinking, dual endpoint) +├── openai_codex.py OpenAI Codex OAuth (api_mode=codex_responses) +├── opencode.py OpenCode Zen + Go (per-model api_mode routing) +│ +├── # Complex — subclasses with multiple overrides +├── nous.py Nous Portal (tags, attribution, reasoning omit-when-disabled) +├── openrouter.py OpenRouter (provider preferences, public model fetch) +├── qwen.py Qwen OAuth (message normalization, cache_control, vl_hires) +└── vercel.py Vercel AI Gateway (attribution headers, reasoning passthrough) +``` + +--- + +## ProviderProfile fields + +```python +@dataclass +class ProviderProfile: + # Identity + name: str # canonical ID — auto-registered as PROVIDER_REGISTRY key for new api-key providers + api_mode: str # "chat_completions" | "anthropic_messages" | + # "codex_responses" | "bedrock_converse" | "copilot_acp" + aliases: tuple # alternate names resolved by get_provider_profile() + + # Auth & endpoints + env_vars: tuple # env var names holding the API key, in priority order + base_url: str # default inference endpoint + models_url: str # explicit models endpoint; falls back to {base_url}/models + # set when the models catalog lives at a different URL + # (e.g. OpenRouter: public /api/v1/models vs /api/v1 inference) + auth_type: str # "api_key" | "oauth_device_code" | "oauth_external" | + # "copilot" | "aws" | "external_process" + + # Client-level quirks + default_headers: dict # extra HTTP headers sent on every request + + # Request-level quirks + fixed_temperature: Any # None = use caller's default; OMIT_TEMPERATURE = don't send + default_max_tokens: int|None # inject max_tokens when caller omits it + default_aux_model: str # cheap model for auxiliary tasks (compression, vision, etc.) + # empty string = use main model (default) +``` + +--- + +## Hooks (override in a subclass) + +| Method | When to override | +|--------|-----------------| +| `prepare_messages(messages)` | Provider needs message pre-processing (Qwen: string → list-of-parts, cache_control) | +| `build_extra_body(*, session_id, **ctx)` | Provider-specific `extra_body` fields (Nous: tags, OpenRouter: provider preferences) | +| `build_api_kwargs_extras(*, reasoning_config, **ctx)` | Returns `(extra_body_additions, top_level_kwargs)` — use when some fields go to `extra_body` and some go top-level (Kimi: `reasoning_effort` top-level; OpenRouter: `reasoning` in extra_body) | +| `fetch_models(*, api_key, timeout)` | Custom model listing (Anthropic: x-api-key header; OpenRouter: public endpoint, no auth; Bedrock/copilot-acp: return None) | + +All hooks have safe defaults — only override what differs from the base. + +--- + +## How to add a new provider + +### 1. Simple (standard OpenAI-compatible endpoint) + +```python +# providers/myprovider.py +from providers import register_provider +from providers.base import ProviderProfile + +myprovider = ProviderProfile( + name="myprovider", # must match id in hermes_cli/auth.py PROVIDER_REGISTRY + aliases=("my-provider", "myp"), + api_mode="chat_completions", + env_vars=("MYPROVIDER_API_KEY",), + base_url="https://api.myprovider.com/v1", + auth_type="api_key", +) + +register_provider(myprovider) +``` + +The default `fetch_models()` will call `GET https://api.myprovider.com/v1/models` +with Bearer auth automatically. No override needed for standard `/v1/models`. + +### 2. With quirks (subclass) + +```python +# providers/myprovider.py +from typing import Any +from providers import register_provider +from providers.base import ProviderProfile + + +class MyProviderProfile(ProviderProfile): + """My provider — custom reasoning header.""" + + def build_api_kwargs_extras( + self, + *, + reasoning_config: dict | None = None, + **ctx: Any, + ) -> tuple[dict[str, Any], dict[str, Any]]: + extra_body: dict[str, Any] = {} + if reasoning_config: + extra_body["my_reasoning"] = reasoning_config.get("effort", "medium") + return extra_body, {} + + def fetch_models( + self, + *, + api_key: str | None = None, + timeout: float = 8.0, + ) -> list[str] | None: + # Override only if your endpoint differs from standard /v1/models + return super().fetch_models(api_key=api_key, timeout=timeout) + + +myprovider = MyProviderProfile( + name="myprovider", + aliases=("myp",), + env_vars=("MYPROVIDER_API_KEY",), + base_url="https://api.myprovider.com/v1", +) + +register_provider(myprovider) +``` + +### 3. Wire it up + +After creating the file, add `name` to the `_PROFILE_ACTIVE_PROVIDERS` set in +`run_agent.py` once you've verified parity against the legacy flag path. Start +with a simple provider (no message prep, no reasoning quirks) and work up. + +--- + +## fetch_models contract + +```python +def fetch_models( + self, + *, + api_key: str | None = None, + timeout: float = 8.0, +) -> list[str] | None: + ... +``` + +- Returns `list[str]`: model IDs from the provider's live endpoint. +- Returns `None`: provider doesn't support REST model listing (Bedrock, copilot-acp), + or the request failed. Callers **must** fall back to `_PROVIDER_MODELS` on `None`. +- Never raises — swallow exceptions and return `None`. +- Default implementation: `GET {base_url}/models` with Bearer auth. Works for any + standard OpenAI-compatible provider. + +**Override when:** +- Auth header is not `Bearer` (Anthropic: `x-api-key`) +- Endpoint path differs from `/models` AND you can't just set `models_url` (OpenRouter: public endpoint, pass `api_key=None` explicitly) +- Response format differs (extra wrapping, non-standard `id` field) +- Provider has no REST endpoint (Bedrock, copilot-acp → return `None`) +- Filtering needed post-fetch (only tool-capable models, etc.) + +Use `models_url` instead of overriding when the only difference is the URL: + +```python +# No subclass needed — just set models_url +myprovider = ProviderProfile( + name="myprovider", + base_url="https://api.myprovider.com/v1", + models_url="https://catalog.myprovider.com/models", # different host +) +``` + +--- + +## Debugging + +### Check if a provider resolves + +```python +from providers import get_provider_profile + +p = get_provider_profile("myprovider") +print(p) # ProviderProfile(name='myprovider', ...) +print(p.base_url) +print(p.api_mode) +``` + +### Check all registered providers + +```python +from providers import _REGISTRY +print(list(_REGISTRY.keys())) +``` + +### Test live model fetch + +```python +import os +from providers import get_provider_profile + +p = get_provider_profile("myprovider") +key = os.getenv("MYPROVIDER_API_KEY") +models = p.fetch_models(api_key=key, timeout=5.0) +print(models) # list of model IDs, or None on failure +``` + +### Test alias resolution + +```python +from providers import get_provider_profile + +# All of these should return the same profile +assert get_provider_profile("openrouter").name == "openrouter" +assert get_provider_profile("or").name == "openrouter" +``` + +### Run the provider test suite + +```bash +# From the repo root +source venv/bin/activate +python -m pytest tests/providers/ -v +``` + +### Check ruff + ty compliance + +```bash +source venv/bin/activate +ruff format providers/*.py +ruff check providers/*.py --select UP,E,F,I,W +ty check providers/*.py +``` + +--- + +## Common mistakes + +**Wrong `name`** — must be the same string that appears as the key in +`hermes_cli/auth.py` `PROVIDER_REGISTRY`. New api-key providers auto-register +into `PROVIDER_REGISTRY` from the profile, so the name IS the key. For providers +with a pre-existing `PROVIDER_REGISTRY` entry, use the exact `id` field value. + +**Wrong `env_vars`** — separate API-key vars from base-URL override vars in the +tuple. Env vars that end with `_BASE_URL` or `_URL` are treated as URL overrides; +everything else is treated as an API key. Getting this wrong causes the doctor +health check to send a URL string as a Bearer token. + +**Wrong `base_url`** — several providers have non-obvious paths: +`stepfun: /step_plan/v1`, `opencode-go: /zen/go/v1`. The profile's `base_url` +is also used as the `inference_base_url` when auto-registering into `PROVIDER_REGISTRY` +for new providers, so it must be correct for auth resolution to work. + +**Skipping `api_mode`** — defaults to `chat_completions`. Providers that use +`anthropic_messages`, `codex_responses`, `bedrock_converse`, or `copilot_acp` +must set it explicitly. + +**Forgetting `register_provider()`** — auto-discovery runs `pkgutil.iter_modules` +over the package and imports each module, but only if `register_provider()` is +called at module level. Without it the profile is never in `_REGISTRY`. + +**`fetch_models` returning the wrong shape** — must return `list[str]` (plain +model IDs), not `list[tuple]` or `list[dict]`. Callers expect plain strings. + +**Wrong `build_api_kwargs_extras` return shape** — must return a 2-tuple +`(extra_body_dict, top_level_dict)`. Returning a single dict causes a +`ValueError: not enough values to unpack` in the transport. + +**`build_api_kwargs_extras` wrong tuple** — must return `(extra_body_dict, +top_level_dict)`. Returning a flat dict or swapping the order silently sends +fields to the wrong place. diff --git a/providers/__init__.py b/providers/__init__.py new file mode 100644 index 0000000000..9c80b449a9 --- /dev/null +++ b/providers/__init__.py @@ -0,0 +1,76 @@ +"""Provider module registry. + +Auto-discovers ProviderProfile instances from providers/*.py modules. +Each module should define a module-level PROVIDER or PROVIDERS list. + +Usage: + from providers import get_provider_profile + profile = get_provider_profile("nvidia") # returns ProviderProfile or None + profile = get_provider_profile("kimi") # checks name + aliases +""" + +from __future__ import annotations + +from providers.base import OMIT_TEMPERATURE, ProviderProfile # noqa: F401 + +_REGISTRY: dict[str, ProviderProfile] = {} +_ALIASES: dict[str, str] = {} +_discovered = False + + +def register_provider(profile: ProviderProfile) -> None: + """Register a provider profile by name and aliases.""" + _REGISTRY[profile.name] = profile + for alias in profile.aliases: + _ALIASES[alias] = profile.name + + +def get_provider_profile(name: str) -> ProviderProfile | None: + """Look up a provider profile by name or alias. + + Returns None if the provider has no profile (falls back to generic). + """ + if not _discovered: + _discover_providers() + canonical = _ALIASES.get(name, name) + return _REGISTRY.get(canonical) + + +def list_providers() -> list[ProviderProfile]: + """Return all registered provider profiles (one per canonical name).""" + if not _discovered: + _discover_providers() + # Deduplicate: _REGISTRY has canonical names; _ALIASES points to same objects + seen: set[int] = set() + result: list[ProviderProfile] = [] + for profile in _REGISTRY.values(): + pid = id(profile) + if pid not in seen: + seen.add(pid) + result.append(profile) + return result + + +def _discover_providers() -> None: + """Import all provider modules to trigger registration.""" + global _discovered + if _discovered: + return + _discovered = True + + import importlib + import pkgutil + + import providers as _pkg + + for _importer, modname, _ispkg in pkgutil.iter_modules(_pkg.__path__): + if modname.startswith("_") or modname == "base": + continue + try: + importlib.import_module(f"providers.{modname}") + except ImportError as e: + import logging + + logging.getLogger(__name__).warning( + "Failed to import provider module %s: %s", modname, e + ) diff --git a/providers/alibaba.py b/providers/alibaba.py new file mode 100644 index 0000000000..5772bc87e6 --- /dev/null +++ b/providers/alibaba.py @@ -0,0 +1,13 @@ +"""Alibaba Cloud DashScope provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +alibaba = ProviderProfile( + name="alibaba", + aliases=("dashscope", "alibaba-cloud", "qwen-dashscope"), + env_vars=("DASHSCOPE_API_KEY",), + base_url="https://dashscope-intl.aliyuncs.com/compatible-mode/v1", +) + +register_provider(alibaba) diff --git a/providers/anthropic.py b/providers/anthropic.py new file mode 100644 index 0000000000..f1f45eb82c --- /dev/null +++ b/providers/anthropic.py @@ -0,0 +1,52 @@ +"""Native Anthropic provider profile.""" + +import json +import logging +import urllib.request + +from providers import register_provider +from providers.base import ProviderProfile + +logger = logging.getLogger(__name__) + + +class AnthropicProfile(ProviderProfile): + """Native Anthropic — uses x-api-key header, not Bearer.""" + + def fetch_models( + self, + *, + api_key: str | None = None, + timeout: float = 8.0, + ) -> list[str] | None: + """Anthropic uses x-api-key header and anthropic-version.""" + if not api_key: + return None + try: + req = urllib.request.Request("https://api.anthropic.com/v1/models") + req.add_header("x-api-key", api_key) + req.add_header("anthropic-version", "2023-06-01") + req.add_header("Accept", "application/json") + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + return [ + m["id"] + for m in data.get("data", []) + if isinstance(m, dict) and "id" in m + ] + except Exception as exc: + logger.debug("fetch_models(anthropic): %s", exc) + return None + + +anthropic = AnthropicProfile( + name="anthropic", + aliases=("claude", "claude-oauth", "claude-code"), + api_mode="anthropic_messages", + env_vars=("ANTHROPIC_API_KEY", "ANTHROPIC_TOKEN", "CLAUDE_CODE_OAUTH_TOKEN"), + base_url="https://api.anthropic.com", + auth_type="api_key", + default_aux_model="claude-haiku-4-5-20251001", +) + +register_provider(anthropic) diff --git a/providers/arcee.py b/providers/arcee.py new file mode 100644 index 0000000000..46afb6e16e --- /dev/null +++ b/providers/arcee.py @@ -0,0 +1,13 @@ +"""Arcee AI provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +arcee = ProviderProfile( + name="arcee", + aliases=("arcee-ai", "arceeai"), + env_vars=("ARCEEAI_API_KEY",), + base_url="https://api.arcee.ai/api/v1", +) + +register_provider(arcee) diff --git a/providers/base.py b/providers/base.py new file mode 100644 index 0000000000..2c685f9b81 --- /dev/null +++ b/providers/base.py @@ -0,0 +1,165 @@ +"""Provider profile base class. + +A ProviderProfile declares everything about an inference provider in one place: +auth, endpoints, client quirks, request-time quirks. The transport reads this +instead of receiving 20+ boolean flags. + +Provider profiles are DECLARATIVE — they describe the provider's behavior. +They do NOT own client construction, credential rotation, or streaming. +Those stay on AIAgent. +""" + +from __future__ import annotations + +import logging +from dataclasses import dataclass, field +from typing import Any + +logger = logging.getLogger(__name__) + +# Sentinel for "omit temperature entirely" (Kimi: server manages it) +OMIT_TEMPERATURE = object() + + +@dataclass +class ProviderProfile: + """Base provider profile — subclass or instantiate with overrides.""" + + # ── Identity ───────────────────────────────────────────── + name: str + api_mode: str = "chat_completions" + aliases: tuple = () + + # ── Human-readable metadata ─────────────────────────────── + display_name: str = "" # e.g. "GMI Cloud" — shown in picker/labels + description: str = "" # e.g. "GMI Cloud (multi-model direct API)" — picker subtitle + signup_url: str = "" # e.g. "https://www.gmicloud.ai/" — shown during setup + + # ── Auth & endpoints ───────────────────────────────────── + env_vars: tuple = () + base_url: str = "" + models_url: str = "" # explicit models endpoint; falls back to {base_url}/models + auth_type: str = "api_key" # api_key|oauth_device_code|oauth_external|copilot|aws_sdk + + # ── Model catalog ───────────────────────────────────────── + # fallback_models: curated list shown in /model picker when live fetch fails. + # Only agentic models that support tool calling should appear here. + fallback_models: tuple = () + + # hostname: base hostname for URL→provider reverse-mapping in model_metadata.py + # e.g. "api.gmi-serving.com". Derived from base_url when empty. + hostname: str = "" + + # ── Client-level quirks (set once at client construction) ─ + default_headers: dict[str, str] = field(default_factory=dict) + + # ── Request-level quirks ───────────────────────────────── + # Temperature: None = use caller's default, OMIT_TEMPERATURE = don't send + fixed_temperature: Any = None + default_max_tokens: int | None = None + default_aux_model: str = ( + "" # cheap model for auxiliary tasks (compression, vision, etc.) + ) + # empty = use main model + + # ── Hooks (override in subclass for complex providers) ─── + + def get_hostname(self) -> str: + """Return the provider's base hostname for URL-based detection. + + Uses self.hostname if set explicitly, otherwise derives it from base_url. + e.g. 'https://api.gmi-serving.com/v1' → 'api.gmi-serving.com' + """ + if self.hostname: + return self.hostname + if self.base_url: + from urllib.parse import urlparse + return urlparse(self.base_url).hostname or "" + return "" + + def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Provider-specific message preprocessing. + + Called AFTER codex field sanitization, BEFORE developer role swap. + Default: pass-through. + """ + return messages + + def build_extra_body( + self, *, session_id: str | None = None, **context: Any + ) -> dict[str, Any]: + """Provider-specific extra_body fields. + + Merged into the API kwargs extra_body. Default: empty dict. + """ + return {} + + def build_api_kwargs_extras( + self, + *, + reasoning_config: dict | None = None, + **context: Any, + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Provider-specific kwargs split between extra_body and top-level api_kwargs. + + Returns (extra_body_additions, top_level_kwargs). + The transport merges extra_body_additions into extra_body, and + top_level_kwargs directly into api_kwargs. + + This split exists because some providers put reasoning config in + extra_body (OpenRouter: extra_body.reasoning) while others put it + as top-level api_kwargs (Kimi: api_kwargs.reasoning_effort). + + Default: ({}, {}). + """ + return {}, {} + + def fetch_models( + self, + *, + api_key: str | None = None, + timeout: float = 8.0, + ) -> list[str] | None: + """Fetch the live model list from the provider's models endpoint. + + Returns a list of model ID strings, or None if the fetch failed or + the provider does not support live model listing. + + Resolution order for the endpoint URL: + 1. self.models_url (explicit override — use when the models + endpoint differs from the inference base URL, e.g. OpenRouter + exposes a public catalog at /api/v1/models while inference is + at /api/v1) + 2. self.base_url + "/models" (standard OpenAI-compat fallback) + + The default implementation sends Bearer auth when api_key is given + and forwards self.default_headers. Override to customise auth, path, + response shape, or to return None for providers with no REST catalog. + + Callers must always fall back to the static _PROVIDER_MODELS list + when this returns None. + """ + url = (self.models_url or "").strip() + if not url: + if not self.base_url: + return None + url = self.base_url.rstrip("/") + "/models" + + import json + import urllib.request + + req = urllib.request.Request(url) + if api_key: + req.add_header("Authorization", f"Bearer {api_key}") + req.add_header("Accept", "application/json") + for k, v in self.default_headers.items(): + req.add_header(k, v) + + try: + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + items = data if isinstance(data, list) else data.get("data", []) + return [m["id"] for m in items if isinstance(m, dict) and "id" in m] + except Exception as exc: + logger.debug("fetch_models(%s): %s", self.name, exc) + return None diff --git a/providers/bedrock.py b/providers/bedrock.py new file mode 100644 index 0000000000..6fdbbe834d --- /dev/null +++ b/providers/bedrock.py @@ -0,0 +1,29 @@ +"""AWS Bedrock provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + + +class BedrockProfile(ProviderProfile): + """AWS Bedrock — no REST /v1/models endpoint; uses AWS SDK.""" + + def fetch_models( + self, + *, + api_key: str | None = None, + timeout: float = 8.0, + ) -> list[str] | None: + """Bedrock model listing requires AWS SDK, not a REST call.""" + return None + + +bedrock = BedrockProfile( + name="bedrock", + aliases=("aws", "aws-bedrock", "amazon-bedrock", "amazon"), + api_mode="bedrock_converse", + env_vars=(), # AWS SDK credentials — not env vars + base_url="https://bedrock-runtime.us-east-1.amazonaws.com", + auth_type="aws_sdk", +) + +register_provider(bedrock) diff --git a/providers/copilot.py b/providers/copilot.py new file mode 100644 index 0000000000..d4409c108d --- /dev/null +++ b/providers/copilot.py @@ -0,0 +1,58 @@ +"""Copilot / GitHub Models provider profile. + +Copilot uses per-model api_mode routing: + - GPT-5+ / Codex models → codex_responses + - Claude models → anthropic_messages + - Everything else → chat_completions (this profile covers that subset) + +Key quirks for the chat_completions subset: + - Editor attribution headers (via copilot_default_headers()) + - GitHub Models reasoning extra_body (model-catalog gated) +""" + +from typing import Any + +from providers import register_provider +from providers.base import ProviderProfile + + +class CopilotProfile(ProviderProfile): + """GitHub Copilot / GitHub Models — editor headers + reasoning.""" + + def build_api_kwargs_extras( + self, + *, + model: str | None = None, + reasoning_config: dict | None = None, + supports_reasoning: bool = False, + **ctx, + ) -> tuple[dict[str, Any], dict[str, Any]]: + extra_body: dict[str, Any] = {} + if supports_reasoning and model: + try: + from hermes_cli.models import github_model_reasoning_efforts + + supported_efforts = github_model_reasoning_efforts(model) + if supported_efforts and reasoning_config: + effort = reasoning_config.get("effort", "medium") + # Normalize non-standard effort levels to the nearest supported + if effort == "xhigh": + effort = "high" + if effort in supported_efforts: + extra_body["reasoning"] = {"effort": effort} + elif supported_efforts: + extra_body["reasoning"] = {"effort": "medium"} + except Exception: + pass + return extra_body, {} + + +copilot = CopilotProfile( + name="copilot", + aliases=("github-copilot", "github-models", "github-model", "github"), + env_vars=("COPILOT_GITHUB_TOKEN", "GH_TOKEN", "GITHUB_TOKEN"), + base_url="https://api.githubcopilot.com", + auth_type="copilot", +) + +register_provider(copilot) diff --git a/providers/copilot_acp.py b/providers/copilot_acp.py new file mode 100644 index 0000000000..21ec7da2e9 --- /dev/null +++ b/providers/copilot_acp.py @@ -0,0 +1,34 @@ +"""GitHub Copilot ACP provider profile. + +copilot-acp uses an external ACP subprocess — NOT the standard +transport. api_mode="copilot_acp" is handled separately in run_agent.py. +The profile captures auth + endpoint metadata for registry migration. +""" + +from providers import register_provider +from providers.base import ProviderProfile + + +class CopilotACPProfile(ProviderProfile): + """GitHub Copilot ACP — external process, no REST models endpoint.""" + + def fetch_models( + self, + *, + api_key: str | None = None, + timeout: float = 8.0, + ) -> list[str] | None: + """Model listing is handled by the ACP subprocess.""" + return None + + +copilot_acp = CopilotACPProfile( + name="copilot-acp", + aliases=("github-copilot-acp", "copilot-acp-agent"), + api_mode="chat_completions", # ACP subprocess uses chat_completions routing + env_vars=(), # Managed by ACP subprocess + base_url="acp://copilot", # ACP internal scheme + auth_type="external_process", +) + +register_provider(copilot_acp) diff --git a/providers/custom.py b/providers/custom.py new file mode 100644 index 0000000000..5707571ceb --- /dev/null +++ b/providers/custom.py @@ -0,0 +1,71 @@ +"""Custom / Ollama (local) provider profile. + +Covers any endpoint registered as provider="custom", including local +Ollama instances. Key quirks: + - ollama_num_ctx → extra_body.options.num_ctx (local context window) + - reasoning_config disabled → extra_body.think = False +""" + +from typing import Any + +from providers import register_provider +from providers.base import ProviderProfile + + +class CustomProfile(ProviderProfile): + """Custom/Ollama local provider — think=false and num_ctx support.""" + + def build_api_kwargs_extras( + self, + *, + reasoning_config: dict | None = None, + ollama_num_ctx: int | None = None, + **ctx: Any, + ) -> tuple[dict[str, Any], dict[str, Any]]: + extra_body: dict[str, Any] = {} + + # Ollama context window + if ollama_num_ctx: + options = extra_body.get("options", {}) + options["num_ctx"] = ollama_num_ctx + extra_body["options"] = options + + # Disable thinking when reasoning is turned off + if reasoning_config and isinstance(reasoning_config, dict): + _effort = (reasoning_config.get("effort") or "").strip().lower() + _enabled = reasoning_config.get("enabled", True) + if _effort == "none" or _enabled is False: + extra_body["think"] = False + + return extra_body, {} + + def fetch_models( + self, + *, + api_key: str | None = None, + timeout: float = 8.0, + ) -> list[str] | None: + """Custom/Ollama: base_url is user-configured; fetch if set.""" + if not self.base_url: + return None + return super().fetch_models(api_key=api_key, timeout=timeout) + + +custom = CustomProfile( + name="custom", + aliases=( + "ollama", + "local", + "lmstudio", + "lm-studio", + "lm_studio", + "vllm", + "llamacpp", + "llama.cpp", + "llama-cpp", + ), + env_vars=(), # No fixed key — custom endpoint + base_url="", # User-configured +) + +register_provider(custom) diff --git a/providers/deepseek.py b/providers/deepseek.py new file mode 100644 index 0000000000..59d738f50f --- /dev/null +++ b/providers/deepseek.py @@ -0,0 +1,20 @@ +"""DeepSeek provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +deepseek = ProviderProfile( + name="deepseek", + aliases=("deepseek-chat",), + env_vars=("DEEPSEEK_API_KEY",), + display_name="DeepSeek", + description="DeepSeek — native DeepSeek API", + signup_url="https://platform.deepseek.com/", + fallback_models=( + "deepseek-chat", + "deepseek-reasoner", + ), + base_url="https://api.deepseek.com/v1", +) + +register_provider(deepseek) diff --git a/providers/gemini.py b/providers/gemini.py new file mode 100644 index 0000000000..216057fb9f --- /dev/null +++ b/providers/gemini.py @@ -0,0 +1,34 @@ +"""Google Gemini provider profiles. + +gemini: Google AI Studio (API key) — uses GeminiNativeClient +google-gemini-cli: Google Cloud Code Assist (OAuth) — uses GeminiCloudCodeClient + +Both report api_mode="chat_completions" but use custom native clients +that bypass the standard OpenAI transport. The profile captures auth +and endpoint metadata for auth.py / runtime_provider.py migration. +""" + +from providers import register_provider +from providers.base import ProviderProfile + +gemini = ProviderProfile( + name="gemini", + aliases=("google", "google-gemini", "google-ai-studio"), + api_mode="chat_completions", + env_vars=("GOOGLE_API_KEY", "GEMINI_API_KEY"), + base_url="https://generativelanguage.googleapis.com/v1beta", + auth_type="api_key", + default_aux_model="gemini-3-flash-preview", +) + +google_gemini_cli = ProviderProfile( + name="google-gemini-cli", + aliases=("gemini-cli", "gemini-oauth"), + api_mode="chat_completions", + env_vars=(), # OAuth — no API key + base_url="cloudcode-pa://google", # Cloud Code Assist internal scheme + auth_type="oauth_external", +) + +register_provider(gemini) +register_provider(google_gemini_cli) diff --git a/providers/gmi.py b/providers/gmi.py new file mode 100644 index 0000000000..a7cc32e552 --- /dev/null +++ b/providers/gmi.py @@ -0,0 +1,26 @@ +"""GMI Cloud provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +gmi = ProviderProfile( + name="gmi", + aliases=("gmi-cloud", "gmicloud"), + display_name="GMI Cloud", + description="GMI Cloud — multi-model direct API (slash-form model IDs)", + signup_url="https://www.gmicloud.ai/", + env_vars=("GMI_API_KEY", "GMI_BASE_URL"), + base_url="https://api.gmi-serving.com/v1", + auth_type="api_key", + default_aux_model="google/gemini-3.1-flash-lite-preview", + fallback_models=( + "zai-org/GLM-5.1-FP8", + "deepseek-ai/DeepSeek-V3.2", + "moonshotai/Kimi-K2.5", + "google/gemini-3.1-flash-lite-preview", + "anthropic/claude-sonnet-4.6", + "openai/gpt-5.4", + ), +) + +register_provider(gmi) diff --git a/providers/huggingface.py b/providers/huggingface.py new file mode 100644 index 0000000000..039d5a1319 --- /dev/null +++ b/providers/huggingface.py @@ -0,0 +1,20 @@ +"""Hugging Face provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +huggingface = ProviderProfile( + name="huggingface", + aliases=("hf", "hugging-face", "huggingface-hub"), + env_vars=("HF_TOKEN",), + display_name="HuggingFace", + description="HuggingFace Inference API", + signup_url="https://huggingface.co/settings/tokens", + fallback_models=( + "Qwen/Qwen3.5-72B-Instruct", + "deepseek-ai/DeepSeek-V3.2", + ), + base_url="https://router.huggingface.co/v1", +) + +register_provider(huggingface) diff --git a/providers/kilocode.py b/providers/kilocode.py new file mode 100644 index 0000000000..23123966aa --- /dev/null +++ b/providers/kilocode.py @@ -0,0 +1,14 @@ +"""Kilo Code provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +kilocode = ProviderProfile( + name="kilocode", + aliases=("kilo-code", "kilo", "kilo-gateway"), + env_vars=("KILOCODE_API_KEY",), + base_url="https://api.kilo.ai/api/gateway", + default_aux_model="google/gemini-3-flash-preview", +) + +register_provider(kilocode) diff --git a/providers/kimi.py b/providers/kimi.py new file mode 100644 index 0000000000..b5cf53a801 --- /dev/null +++ b/providers/kimi.py @@ -0,0 +1,71 @@ +"""Kimi / Moonshot provider profiles. + +Kimi has dual endpoints: + - sk-kimi-* keys → api.kimi.com/coding (Anthropic Messages API) + - legacy keys → api.moonshot.ai/v1 (OpenAI chat completions) + +This module covers the chat_completions path (/v1 endpoint). +""" + +from typing import Any + +from providers import register_provider +from providers.base import OMIT_TEMPERATURE, ProviderProfile + + +class KimiProfile(ProviderProfile): + """Kimi/Moonshot — temperature omitted, thinking + reasoning_effort.""" + + def build_api_kwargs_extras( + self, *, reasoning_config: dict | None = None, **context + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Kimi uses extra_body.thinking + top-level reasoning_effort.""" + extra_body = {} + top_level = {} + + if not reasoning_config or not isinstance(reasoning_config, dict): + # No config → thinking enabled, default effort + extra_body["thinking"] = {"type": "enabled"} + top_level["reasoning_effort"] = "medium" + return extra_body, top_level + + enabled = reasoning_config.get("enabled", True) + if enabled is False: + extra_body["thinking"] = {"type": "disabled"} + return extra_body, top_level + + # Enabled + extra_body["thinking"] = {"type": "enabled"} + effort = (reasoning_config.get("effort") or "").strip().lower() + if effort in ("low", "medium", "high"): + top_level["reasoning_effort"] = effort + else: + top_level["reasoning_effort"] = "medium" + + return extra_body, top_level + + +kimi = KimiProfile( + name="kimi-coding", + aliases=("kimi", "moonshot", "kimi-for-coding"), + env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"), + base_url="https://api.moonshot.ai/v1", + fixed_temperature=OMIT_TEMPERATURE, + default_max_tokens=32000, + default_headers={"User-Agent": "hermes-agent/1.0"}, + default_aux_model="kimi-k2-turbo-preview", +) + +kimi_cn = KimiProfile( + name="kimi-coding-cn", + aliases=("kimi-cn", "moonshot-cn"), + env_vars=("KIMI_CN_API_KEY",), + base_url="https://api.moonshot.cn/v1", + fixed_temperature=OMIT_TEMPERATURE, + default_max_tokens=32000, + default_headers={"User-Agent": "hermes-agent/1.0"}, + default_aux_model="kimi-k2-turbo-preview", +) + +register_provider(kimi) +register_provider(kimi_cn) diff --git a/providers/minimax.py b/providers/minimax.py new file mode 100644 index 0000000000..8fb106a8bf --- /dev/null +++ b/providers/minimax.py @@ -0,0 +1,31 @@ +"""MiniMax provider profiles (international + China). + +Both use anthropic_messages api_mode — their inference_base_url +ends with /anthropic which triggers auto-detection to anthropic_messages. +""" + +from providers import register_provider +from providers.base import ProviderProfile + +minimax = ProviderProfile( + name="minimax", + aliases=("mini-max",), + api_mode="anthropic_messages", + env_vars=("MINIMAX_API_KEY",), + base_url="https://api.minimax.io/anthropic", + auth_type="api_key", + default_aux_model="MiniMax-M2.7", +) + +minimax_cn = ProviderProfile( + name="minimax-cn", + aliases=("minimax-china", "minimax_cn"), + api_mode="anthropic_messages", + env_vars=("MINIMAX_CN_API_KEY",), + base_url="https://api.minimaxi.com/anthropic", + auth_type="api_key", + default_aux_model="MiniMax-M2.7", +) + +register_provider(minimax) +register_provider(minimax_cn) diff --git a/providers/nous.py b/providers/nous.py new file mode 100644 index 0000000000..f89e56c23a --- /dev/null +++ b/providers/nous.py @@ -0,0 +1,53 @@ +"""Nous Portal provider profile.""" + +from typing import Any + +from providers import register_provider +from providers.base import ProviderProfile + + +class NousProfile(ProviderProfile): + """Nous Portal — product tags, reasoning with Nous-specific omission.""" + + def build_extra_body( + self, *, session_id: str | None = None, **context + ) -> dict[str, Any]: + return {"tags": ["product=hermes-agent"]} + + def build_api_kwargs_extras( + self, + *, + reasoning_config: dict | None = None, + supports_reasoning: bool = False, + **context, + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Nous: passes full reasoning_config, but OMITS when disabled.""" + extra_body = {} + if supports_reasoning: + if reasoning_config is not None: + rc = dict(reasoning_config) + if rc.get("enabled") is False: + pass # Nous omits reasoning when disabled + else: + extra_body["reasoning"] = rc + else: + extra_body["reasoning"] = {"enabled": True, "effort": "medium"} + return extra_body, {} + + +nous = NousProfile( + name="nous", + aliases=("nous-portal", "nousresearch"), + env_vars=("NOUS_API_KEY",), + display_name="Nous Research", + description="Nous Research — Hermes model family", + signup_url="https://nousresearch.com/", + fallback_models=( + "hermes-3-405b", + "hermes-3-70b", + ), + base_url="https://inference.nousresearch.com/v1", + auth_type="oauth_device_code", +) + +register_provider(nous) diff --git a/providers/nvidia.py b/providers/nvidia.py new file mode 100644 index 0000000000..f6fdc550f6 --- /dev/null +++ b/providers/nvidia.py @@ -0,0 +1,21 @@ +"""NVIDIA NIM provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +nvidia = ProviderProfile( + name="nvidia", + aliases=("nvidia-nim",), + env_vars=("NVIDIA_API_KEY",), + display_name="NVIDIA NIM", + description="NVIDIA NIM — accelerated inference", + signup_url="https://build.nvidia.com/", + fallback_models=( + "nvidia/llama-3.1-nemotron-70b-instruct", + "nvidia/llama-3.3-70b-instruct", + ), + base_url="https://integrate.api.nvidia.com/v1", + default_max_tokens=16384, +) + +register_provider(nvidia) diff --git a/providers/ollama_cloud.py b/providers/ollama_cloud.py new file mode 100644 index 0000000000..f25c442a40 --- /dev/null +++ b/providers/ollama_cloud.py @@ -0,0 +1,14 @@ +"""Ollama Cloud provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +ollama_cloud = ProviderProfile( + name="ollama-cloud", + aliases=("ollama_cloud",), + default_aux_model="nemotron-3-nano:30b", + env_vars=("OLLAMA_API_KEY",), + base_url="https://ollama.com/v1", +) + +register_provider(ollama_cloud) diff --git a/providers/openai_codex.py b/providers/openai_codex.py new file mode 100644 index 0000000000..8124b9efe4 --- /dev/null +++ b/providers/openai_codex.py @@ -0,0 +1,15 @@ +"""OpenAI Codex (Responses API) provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +openai_codex = ProviderProfile( + name="openai-codex", + aliases=("codex", "openai_codex"), + api_mode="codex_responses", + env_vars=(), # OAuth external — no API key + base_url="https://chatgpt.com/backend-api/codex", + auth_type="oauth_external", +) + +register_provider(openai_codex) diff --git a/providers/opencode.py b/providers/opencode.py new file mode 100644 index 0000000000..f720e8f5fa --- /dev/null +++ b/providers/opencode.py @@ -0,0 +1,30 @@ +"""OpenCode provider profiles (Zen + Go). + +Both use per-model api_mode routing: + - OpenCode Zen: Claude → anthropic_messages, GPT-5/Codex → codex_responses, + everything else → chat_completions (this profile) + - OpenCode Go: MiniMax → anthropic_messages, GLM/Kimi → chat_completions + (this profile) +""" + +from providers import register_provider +from providers.base import ProviderProfile + +opencode_zen = ProviderProfile( + name="opencode-zen", + aliases=("opencode", "opencode_zen", "zen"), + env_vars=("OPENCODE_ZEN_API_KEY",), + base_url="https://opencode.ai/zen/v1", + default_aux_model="gemini-3-flash", +) + +opencode_go = ProviderProfile( + name="opencode-go", + aliases=("opencode_go", "go", "opencode-go-sub"), + env_vars=("OPENCODE_GO_API_KEY",), + base_url="https://opencode.ai/zen/go/v1", + default_aux_model="glm-5", +) + +register_provider(opencode_zen) +register_provider(opencode_go) diff --git a/providers/openrouter.py b/providers/openrouter.py new file mode 100644 index 0000000000..6aad8fc65d --- /dev/null +++ b/providers/openrouter.py @@ -0,0 +1,86 @@ +"""OpenRouter provider profile.""" + +import logging +from typing import Any + +from providers import register_provider +from providers.base import ProviderProfile + +logger = logging.getLogger(__name__) + +_CACHE: list[str] | None = None + + +class OpenRouterProfile(ProviderProfile): + """OpenRouter aggregator — provider preferences, reasoning config passthrough.""" + + def fetch_models( + self, + *, + api_key: str | None = None, + timeout: float = 8.0, + ) -> list[str] | None: + """Fetch from public OpenRouter catalog — no auth required. + + Note: Tool-call capability filtering is applied by hermes_cli/models.py + via fetch_openrouter_models() → _openrouter_model_supports_tools(), not + here. The picker early-returns via the dedicated openrouter path before + reaching this method, so filtering here would be unreachable. + """ + global _CACHE # noqa: PLW0603 + if _CACHE is not None: + return _CACHE + try: + result = super().fetch_models(api_key=None, timeout=timeout) + if result is not None: + _CACHE = result + return result + except Exception as exc: + logger.debug("fetch_models(openrouter): %s", exc) + return None + + def build_extra_body( + self, *, session_id: str | None = None, **context: Any + ) -> dict[str, Any]: + body: dict[str, Any] = {} + prefs = context.get("provider_preferences") + if prefs: + body["provider"] = prefs + return body + + def build_api_kwargs_extras( + self, + *, + reasoning_config: dict | None = None, + supports_reasoning: bool = False, + **context: Any, + ) -> tuple[dict[str, Any], dict[str, Any]]: + """OpenRouter passes the full reasoning_config dict as extra_body.reasoning.""" + extra_body: dict[str, Any] = {} + if supports_reasoning: + if reasoning_config is not None: + extra_body["reasoning"] = dict(reasoning_config) + else: + extra_body["reasoning"] = {"enabled": True, "effort": "medium"} + return extra_body, {} + + +openrouter = OpenRouterProfile( + name="openrouter", + aliases=("or",), + env_vars=("OPENROUTER_API_KEY",), + display_name="OpenRouter", + description="OpenRouter — unified API for 200+ models", + signup_url="https://openrouter.ai/keys", + base_url="https://openrouter.ai/api/v1", + models_url="https://openrouter.ai/api/v1/models", + fallback_models=( + "anthropic/claude-sonnet-4.6", + "openai/gpt-5.4", + "deepseek/deepseek-chat", + "google/gemini-3-flash-preview", + "qwen/qwen3-plus", + ), +) + +register_provider(openrouter) diff --git a/providers/qwen.py b/providers/qwen.py new file mode 100644 index 0000000000..a6ba29f76c --- /dev/null +++ b/providers/qwen.py @@ -0,0 +1,82 @@ +"""Qwen Portal provider profile.""" + +import copy +from typing import Any + +from providers import register_provider +from providers.base import ProviderProfile + + +class QwenProfile(ProviderProfile): + """Qwen Portal — message normalization, vl_high_resolution, metadata top-level.""" + + def prepare_messages(self, messages: list[dict[str, Any]]) -> list[dict[str, Any]]: + """Normalize content to list-of-dicts format. + + Inject cache_control on system message. + + Matches the behavior of run_agent.py:_qwen_prepare_chat_messages(). + """ + prepared = copy.deepcopy(messages) + if not prepared: + return prepared + + for msg in prepared: + if not isinstance(msg, dict): + continue + content = msg.get("content") + if isinstance(content, str): + msg["content"] = [{"type": "text", "text": content}] + elif isinstance(content, list): + normalized_parts = [] + for part in content: + if isinstance(part, str): + normalized_parts.append({"type": "text", "text": part}) + elif isinstance(part, dict): + normalized_parts.append(part) + if normalized_parts: + msg["content"] = normalized_parts + + # Inject cache_control on the last part of the system message. + for msg in prepared: + if isinstance(msg, dict) and msg.get("role") == "system": + content = msg.get("content") + if ( + isinstance(content, list) + and content + and isinstance(content[-1], dict) + ): + content[-1]["cache_control"] = {"type": "ephemeral"} + break + + return prepared + + def build_extra_body( + self, *, session_id: str | None = None, **context + ) -> dict[str, Any]: + return {"vl_high_resolution_images": True} + + def build_api_kwargs_extras( + self, + *, + reasoning_config: dict | None = None, + qwen_session_metadata: dict | None = None, + **context, + ) -> tuple[dict[str, Any], dict[str, Any]]: + """Qwen metadata goes to top-level api_kwargs, not extra_body.""" + top_level = {} + if qwen_session_metadata: + top_level["metadata"] = qwen_session_metadata + return {}, top_level + + +qwen = QwenProfile( + name="qwen-oauth", + aliases=("qwen", "qwen-portal", "qwen-cli"), + env_vars=("QWEN_API_KEY",), + base_url="https://portal.qwen.ai/v1", + auth_type="oauth_external", + default_max_tokens=65536, +) + +register_provider(qwen) diff --git a/providers/stepfun.py b/providers/stepfun.py new file mode 100644 index 0000000000..1ec92cd8be --- /dev/null +++ b/providers/stepfun.py @@ -0,0 +1,14 @@ +"""StepFun provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +stepfun = ProviderProfile( + name="stepfun", + aliases=("step", "stepfun-coding-plan"), + default_aux_model="step-3.5-flash", + env_vars=("STEPFUN_API_KEY",), + base_url="https://api.stepfun.ai/step_plan/v1", +) + +register_provider(stepfun) diff --git a/providers/vercel.py b/providers/vercel.py new file mode 100644 index 0000000000..9d01ab9824 --- /dev/null +++ b/providers/vercel.py @@ -0,0 +1,43 @@ +"""Vercel AI Gateway provider profile. + +AI Gateway routes to multiple backends. Hermes sends attribution +headers and full reasoning config passthrough. +""" + +from typing import Any + +from providers import register_provider +from providers.base import ProviderProfile + + +class VercelAIGatewayProfile(ProviderProfile): + """Vercel AI Gateway — attribution headers + reasoning passthrough.""" + + def build_api_kwargs_extras( + self, + *, + reasoning_config: dict | None = None, + supports_reasoning: bool = True, + **ctx: Any, + ) -> tuple[dict[str, Any], dict[str, Any]]: + extra_body: dict[str, Any] = {} + if supports_reasoning and reasoning_config is not None: + extra_body["reasoning"] = dict(reasoning_config) + elif supports_reasoning: + extra_body["reasoning"] = {"enabled": True, "effort": "medium"} + return extra_body, {} + + +vercel = VercelAIGatewayProfile( + name="ai-gateway", + aliases=("vercel", "vercel-ai-gateway", "ai_gateway", "aigateway"), + env_vars=("AI_GATEWAY_API_KEY",), + base_url="https://ai-gateway.vercel.sh/v1", + default_headers={ + "HTTP-Referer": "https://hermes-agent.nousresearch.com", + "X-Title": "Hermes Agent", + }, + default_aux_model="google/gemini-3-flash", +) + +register_provider(vercel) diff --git a/providers/xai.py b/providers/xai.py new file mode 100644 index 0000000000..8d73ae0199 --- /dev/null +++ b/providers/xai.py @@ -0,0 +1,15 @@ +"""xAI (Grok) provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +xai = ProviderProfile( + name="xai", + aliases=("grok", "x-ai", "x.ai"), + api_mode="codex_responses", + env_vars=("XAI_API_KEY",), + base_url="https://api.x.ai/v1", + auth_type="api_key", +) + +register_provider(xai) diff --git a/providers/xiaomi.py b/providers/xiaomi.py new file mode 100644 index 0000000000..2e0c8db7db --- /dev/null +++ b/providers/xiaomi.py @@ -0,0 +1,13 @@ +"""Xiaomi MiMo provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +xiaomi = ProviderProfile( + name="xiaomi", + aliases=("mimo", "xiaomi-mimo"), + env_vars=("XIAOMI_API_KEY",), + base_url="https://api.xiaomimimo.com/v1", +) + +register_provider(xiaomi) diff --git a/providers/zai.py b/providers/zai.py new file mode 100644 index 0000000000..70aa8704d1 --- /dev/null +++ b/providers/zai.py @@ -0,0 +1,21 @@ +"""ZAI / GLM provider profile.""" + +from providers import register_provider +from providers.base import ProviderProfile + +zai = ProviderProfile( + name="zai", + aliases=("glm", "z-ai", "z.ai", "zhipu"), + env_vars=("GLM_API_KEY", "ZAI_API_KEY", "Z_AI_API_KEY"), + display_name="Z.AI (GLM)", + description="Z.AI / GLM — Zhipu AI models", + signup_url="https://z.ai/", + fallback_models=( + "glm-5", + "glm-4-9b", + ), + base_url="https://api.z.ai/api/paas/v4", + default_aux_model="glm-4.5-flash", +) + +register_provider(zai) diff --git a/pyproject.toml b/pyproject.toml index 4b7e8816ac..e73e543e00 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -137,7 +137,7 @@ py-modules = ["run_agent", "model_tools", "toolsets", "batch_runner", "trajector hermes_cli = ["web_dist/**/*"] [tool.setuptools.packages.find] -include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*"] +include = ["agent", "agent.*", "tools", "tools.*", "hermes_cli", "gateway", "gateway.*", "tui_gateway", "tui_gateway.*", "cron", "acp_adapter", "plugins", "plugins.*", "providers", "providers.*"] [tool.pytest.ini_options] testpaths = ["tests"] diff --git a/run_agent.py b/run_agent.py index 3f2b783082..5e73d5261f 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1371,6 +1371,17 @@ class AIAgent: elif base_url_host_matches(effective_base, "chatgpt.com"): from agent.auxiliary_client import _codex_cloudflare_headers client_kwargs["default_headers"] = _codex_cloudflare_headers(api_key) + elif "default_headers" not in client_kwargs: + # Fall back to profile.default_headers for providers that + # declare custom headers (e.g. Vercel AI Gateway attribution, + # Kimi User-Agent on non-kimi.com endpoints). + try: + from providers import get_provider_profile as _gpf + _ph = _gpf(self.provider) + if _ph and _ph.default_headers: + client_kwargs["default_headers"] = dict(_ph.default_headers) + except Exception: + pass else: # No explicit creds — use the centralized provider router from agent.auxiliary_client import resolve_provider_client @@ -5037,7 +5048,7 @@ class AIAgent: _validate_proxy_env_urls() _validate_base_url(client_kwargs.get("base_url")) if self.provider == "copilot-acp" or str(client_kwargs.get("base_url", "")).startswith("acp://copilot"): - from agent.copilot_acp_client import CopilotACPClient + from acp_adapter.copilot_client import CopilotACPClient client = CopilotACPClient(**client_kwargs) logger.info( @@ -5726,7 +5737,19 @@ class AIAgent: self._client_kwargs.get("api_key", "") ) else: - self._client_kwargs.pop("default_headers", None) + # No URL-specific headers — check profile.default_headers before clearing. + _ph_headers = None + try: + from providers import get_provider_profile as _gpf2 + _ph2 = _gpf2(self.provider) + if _ph2 and _ph2.default_headers: + _ph_headers = dict(_ph2.default_headers) + except Exception: + pass + if _ph_headers: + self._client_kwargs["default_headers"] = _ph_headers + else: + self._client_kwargs.pop("default_headers", None) def _swap_credential(self, entry) -> None: runtime_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") @@ -7857,66 +7880,79 @@ class AIAgent: # ── chat_completions (default) ───────────────────────────────────── _ct = self._get_transport() - # Provider detection flags - _is_qwen = self._is_qwen_portal() - _is_or = self._is_openrouter_url() - _is_gh = ( - base_url_host_matches(self._base_url_lower, "models.github.ai") - or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com") - ) - _is_nous = "nousresearch" in self._base_url_lower - _is_nvidia = "integrate.api.nvidia.com" in self._base_url_lower - _is_kimi = ( - base_url_host_matches(self.base_url, "api.kimi.com") - or base_url_host_matches(self.base_url, "moonshot.ai") - or base_url_host_matches(self.base_url, "moonshot.cn") - ) - - # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE - # sentinel (temperature omitted entirely), a numeric override, or None. + # ── Provider profile path (all chat_completions providers) ───────── + # Profiles handle per-provider quirks via hooks. We compute the shared + # per-call context here and pass it through so hooks can use it. try: - from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE - _ft = _fixed_temperature_for_model(self.model, self.base_url) - _omit_temp = _ft is OMIT_TEMPERATURE - _fixed_temp = _ft if not _omit_temp else None + from providers import get_provider_profile + _profile = get_provider_profile(self.provider) except Exception: - _omit_temp = False - _fixed_temp = None + _profile = None - # Provider preferences (OpenRouter-specific) - _prefs: Dict[str, Any] = {} - if self.providers_allowed: - _prefs["only"] = self.providers_allowed - if self.providers_ignored: - _prefs["ignore"] = self.providers_ignored - if self.providers_order: - _prefs["order"] = self.providers_order - if self.provider_sort: - _prefs["sort"] = self.provider_sort - if self.provider_require_parameters: - _prefs["require_parameters"] = True - if self.provider_data_collection: - _prefs["data_collection"] = self.provider_data_collection + if _profile: + _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) + if _ephemeral_out is not None: + self._ephemeral_max_output_tokens = None - # Anthropic max output for Claude on OpenRouter/Nous - _ant_max = None - if (_is_or or _is_nous) and "claude" in (self.model or "").lower(): - try: - from agent.anthropic_adapter import _get_anthropic_max_output - _ant_max = _get_anthropic_max_output(self.model) - except Exception: - pass # fail open — let the proxy pick its default + # Per-call context for profile hooks — mirrors the legacy flag block. + # Computed here so profiles receive live per-call values (not stale). + _prefs: Dict[str, Any] = {} + if self.providers_allowed: + _prefs["only"] = self.providers_allowed + if self.providers_ignored: + _prefs["ignore"] = self.providers_ignored + if self.providers_order: + _prefs["order"] = self.providers_order + if self.provider_sort: + _prefs["sort"] = self.provider_sort + if self.provider_require_parameters: + _prefs["require_parameters"] = True + if self.provider_data_collection: + _prefs["data_collection"] = self.provider_data_collection - # Qwen session metadata precomputed here (promptId is per-call random) - _qwen_meta = None - if _is_qwen: - _qwen_meta = { - "sessionId": self.session_id or "hermes", - "promptId": str(uuid.uuid4()), - } + _is_or = self._is_openrouter_url() + _is_nous = "nousresearch" in self._base_url_lower + _ant_max = None + if (_is_or or _is_nous) and "claude" in (self.model or "").lower(): + try: + from agent.anthropic_adapter import _get_anthropic_max_output + _ant_max = _get_anthropic_max_output(self.model) + except Exception: + pass - # Ephemeral max output override — consume immediately so the next - # turn doesn't inherit it. + _is_qwen = self._is_qwen_portal() + _qwen_meta = None + if _is_qwen: + _qwen_meta = { + "sessionId": self.session_id or "hermes", + "promptId": str(uuid.uuid4()), + } + + return _ct.build_kwargs( + model=self.model, + messages=api_messages, + tools=self.tools, + timeout=self._resolved_api_call_timeout(), + max_tokens=self.max_tokens, + ephemeral_max_output_tokens=_ephemeral_out, + max_tokens_param_fn=self._max_tokens_param, + reasoning_config=self.reasoning_config, + request_overrides=self.request_overrides, + session_id=getattr(self, "session_id", None), + provider_profile=_profile, + ollama_num_ctx=self._ollama_num_ctx, + # Context forwarded to profile hooks: + provider_preferences=_prefs or None, + anthropic_max_output=_ant_max, + supports_reasoning=self._supports_reasoning_extra_body(), + qwen_session_metadata=_qwen_meta, + ) + + # ── Legacy flag path ──────────────────────────────────────────── + # Reached only when get_provider_profile() returns None — i.e. a + # completely unknown provider not in providers/ registry. + # Best-effort: send a clean chat_completions request with no + # provider-specific quirks. _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) if _ephemeral_out is not None: self._ephemeral_max_output_tokens = None @@ -7935,24 +7971,7 @@ class AIAgent: reasoning_config=self.reasoning_config, request_overrides=self.request_overrides, session_id=getattr(self, "session_id", None), - model_lower=(self.model or "").lower(), - is_openrouter=_is_or, - is_nous=_is_nous, - is_qwen_portal=_is_qwen, - is_github_models=_is_gh, - is_nvidia_nim=_is_nvidia, - is_kimi=_is_kimi, - is_custom_provider=self.provider == "custom", ollama_num_ctx=self._ollama_num_ctx, - provider_preferences=_prefs or None, - qwen_prepare_fn=self._qwen_prepare_chat_messages if _is_qwen else None, - qwen_prepare_inplace_fn=self._qwen_prepare_chat_messages_inplace if _is_qwen else None, - qwen_session_metadata=_qwen_meta, - fixed_temperature=_fixed_temp, - omit_temperature=_omit_temp, - supports_reasoning=self._supports_reasoning_extra_body(), - github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None, - anthropic_max_output=_ant_max, ) def _supports_reasoning_extra_body(self) -> bool: diff --git a/tests/agent/test_copilot_acp_client.py b/tests/agent/test_copilot_acp_client.py index 63c87fdabd..584330ca79 100644 --- a/tests/agent/test_copilot_acp_client.py +++ b/tests/agent/test_copilot_acp_client.py @@ -10,7 +10,7 @@ import unittest from pathlib import Path from unittest.mock import patch -from agent.copilot_acp_client import CopilotACPClient +from acp_adapter.copilot_client import CopilotACPClient class _FakeProcess: @@ -100,7 +100,7 @@ class CopilotACPClientSafetyTests(unittest.TestCase): target = home / ".ssh" / "id_rsa" target.parent.mkdir(parents=True, exist_ok=True) - with patch("agent.copilot_acp_client.is_write_denied", return_value=True, create=True): + with patch("acp_adapter.copilot_client.is_write_denied", return_value=True, create=True): response = self._dispatch( { "jsonrpc": "2.0", diff --git a/tests/agent/test_minimax_provider.py b/tests/agent/test_minimax_provider.py index 9ae865d57e..329500917d 100644 --- a/tests/agent/test_minimax_provider.py +++ b/tests/agent/test_minimax_provider.py @@ -71,17 +71,17 @@ class TestMinimaxThinkingSupport: class TestMinimaxAuxModel: - """Verify auxiliary model is standard (not highspeed).""" + """Verify auxiliary model is standard (not highspeed) — now reads from profiles.""" def test_minimax_aux_is_standard(self): - from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS - assert _API_KEY_PROVIDER_AUX_MODELS["minimax"] == "MiniMax-M2.7" - assert _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] == "MiniMax-M2.7" + from agent.auxiliary_client import _get_aux_model_for_provider + assert _get_aux_model_for_provider("minimax") == "MiniMax-M2.7" + assert _get_aux_model_for_provider("minimax-cn") == "MiniMax-M2.7" def test_minimax_aux_not_highspeed(self): - from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS - assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax"] - assert "highspeed" not in _API_KEY_PROVIDER_AUX_MODELS["minimax-cn"] + from agent.auxiliary_client import _get_aux_model_for_provider + assert "highspeed" not in _get_aux_model_for_provider("minimax") + assert "highspeed" not in _get_aux_model_for_provider("minimax-cn") class TestMinimaxBetaHeaders: diff --git a/tests/agent/transports/test_chat_completions.py b/tests/agent/transports/test_chat_completions.py index 4adf9f72e5..5a9bf9ca89 100644 --- a/tests/agent/transports/test_chat_completions.py +++ b/tests/agent/transports/test_chat_completions.py @@ -73,17 +73,21 @@ class TestChatCompletionsBuildKwargs: assert kw["tools"] == tools def test_openrouter_provider_prefs(self, transport): + from providers import get_provider_profile + profile = get_provider_profile("openrouter") msgs = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs( model="gpt-4o", messages=msgs, - is_openrouter=True, + provider_profile=profile, provider_preferences={"only": ["openai"]}, ) assert kw["extra_body"]["provider"] == {"only": ["openai"]} def test_nous_tags(self, transport): + from providers import get_provider_profile + profile = get_provider_profile("nous") msgs = [{"role": "user", "content": "Hi"}] - kw = transport.build_kwargs(model="gpt-4o", messages=msgs, is_nous=True) + kw = transport.build_kwargs(model="gpt-4o", messages=msgs, provider_profile=profile) assert kw["extra_body"]["tags"] == ["product=hermes-agent"] def test_reasoning_default(self, transport): @@ -95,29 +99,36 @@ class TestChatCompletionsBuildKwargs: assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"} def test_nous_omits_disabled_reasoning(self, transport): + from providers import get_provider_profile + profile = get_provider_profile("nous") msgs = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs( model="gpt-4o", messages=msgs, + provider_profile=profile, supports_reasoning=True, - is_nous=True, reasoning_config={"enabled": False}, ) # Nous rejects enabled=false; reasoning omitted entirely assert "reasoning" not in kw.get("extra_body", {}) def test_ollama_num_ctx(self, transport): + from providers import get_provider_profile + profile = get_provider_profile("custom") msgs = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs( model="llama3", messages=msgs, + provider_profile=profile, ollama_num_ctx=32768, ) assert kw["extra_body"]["options"]["num_ctx"] == 32768 def test_custom_think_false(self, transport): + from providers import get_provider_profile + profile = get_provider_profile("custom") msgs = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs( model="qwen3", messages=msgs, - is_custom_provider=True, + provider_profile=profile, reasoning_config={"effort": "none"}, ) assert kw["extra_body"]["think"] is False @@ -142,23 +153,29 @@ class TestChatCompletionsBuildKwargs: assert kw["max_tokens"] == 2048 def test_nvidia_default_max_tokens(self, transport): + """NVIDIA max_tokens=16384 is now set via ProviderProfile, not legacy flag.""" + from providers import get_provider_profile + + profile = get_provider_profile("nvidia") msgs = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs( - model="glm-4.7", messages=msgs, - is_nvidia_nim=True, + model="nvidia/llama-3.1-405b-instruct", + messages=msgs, max_tokens_param_fn=lambda n: {"max_tokens": n}, + provider_profile=profile, ) - # NVIDIA default: 16384 assert kw["max_tokens"] == 16384 def test_qwen_default_max_tokens(self, transport): + from providers import get_provider_profile + profile = get_provider_profile("qwen-oauth") msgs = [{"role": "user", "content": "Hi"}] kw = transport.build_kwargs( model="qwen3-coder-plus", messages=msgs, - is_qwen_portal=True, + provider_profile=profile, max_tokens_param_fn=lambda n: {"max_tokens": n}, ) - # Qwen default: 65536 + # Qwen default: 65536 from profile.default_max_tokens assert kw["max_tokens"] == 65536 def test_anthropic_max_output_for_claude_on_aggregator(self, transport): @@ -181,14 +198,23 @@ class TestChatCompletionsBuildKwargs: assert kw["service_tier"] == "priority" def test_fixed_temperature(self, transport): + """Fixed temperature is now set via ProviderProfile.fixed_temperature.""" + from providers.base import ProviderProfile msgs = [{"role": "user", "content": "Hi"}] - kw = transport.build_kwargs(model="gpt-4o", messages=msgs, fixed_temperature=0.6) + kw = transport.build_kwargs( + model="gpt-4o", messages=msgs, + provider_profile=ProviderProfile(name="_t", fixed_temperature=0.6), + ) assert kw["temperature"] == 0.6 def test_omit_temperature(self, transport): + """Omit temperature is set via ProviderProfile with OMIT_TEMPERATURE sentinel.""" + from providers.base import ProviderProfile, OMIT_TEMPERATURE msgs = [{"role": "user", "content": "Hi"}] - kw = transport.build_kwargs(model="gpt-4o", messages=msgs, omit_temperature=True, fixed_temperature=0.5) - # omit wins + kw = transport.build_kwargs( + model="gpt-4o", messages=msgs, + provider_profile=ProviderProfile(name="_t", fixed_temperature=OMIT_TEMPERATURE), + ) assert "temperature" not in kw @@ -196,18 +222,22 @@ class TestChatCompletionsKimi: """Regression tests for the Kimi/Moonshot quirks migrated into the transport.""" def test_kimi_max_tokens_default(self, transport): + from providers import get_provider_profile + profile = get_provider_profile("kimi-coding") kw = transport.build_kwargs( model="kimi-k2", messages=[{"role": "user", "content": "Hi"}], - is_kimi=True, + provider_profile=profile, max_tokens_param_fn=lambda n: {"max_tokens": n}, ) - # Kimi CLI default: 32000 + # Kimi CLI default: 32000 from KimiProfile.default_max_tokens assert kw["max_tokens"] == 32000 def test_kimi_reasoning_effort_top_level(self, transport): + from providers import get_provider_profile + profile = get_provider_profile("kimi-coding") kw = transport.build_kwargs( model="kimi-k2", messages=[{"role": "user", "content": "Hi"}], - is_kimi=True, + provider_profile=profile, reasoning_config={"effort": "high"}, max_tokens_param_fn=lambda n: {"max_tokens": n}, ) @@ -225,17 +255,21 @@ class TestChatCompletionsKimi: assert "reasoning_effort" not in kw def test_kimi_thinking_enabled_extra_body(self, transport): + from providers import get_provider_profile + profile = get_provider_profile("kimi-coding") kw = transport.build_kwargs( model="kimi-k2", messages=[{"role": "user", "content": "Hi"}], - is_kimi=True, + provider_profile=profile, max_tokens_param_fn=lambda n: {"max_tokens": n}, ) assert kw["extra_body"]["thinking"] == {"type": "enabled"} def test_kimi_thinking_disabled_extra_body(self, transport): + from providers import get_provider_profile + profile = get_provider_profile("kimi-coding") kw = transport.build_kwargs( model="kimi-k2", messages=[{"role": "user", "content": "Hi"}], - is_kimi=True, + provider_profile=profile, reasoning_config={"enabled": False}, max_tokens_param_fn=lambda n: {"max_tokens": n}, ) diff --git a/tests/hermes_cli/test_gmi_provider.py b/tests/hermes_cli/test_gmi_provider.py index d3b8c1d7aa..0b9363e675 100644 --- a/tests/hermes_cli/test_gmi_provider.py +++ b/tests/hermes_cli/test_gmi_provider.py @@ -269,9 +269,9 @@ class TestGmiModelMetadata: class TestGmiAuxiliary: def test_aux_default_model(self): - from agent.auxiliary_client import _API_KEY_PROVIDER_AUX_MODELS + from agent.auxiliary_client import _get_aux_model_for_provider - assert _API_KEY_PROVIDER_AUX_MODELS["gmi"] == "google/gemini-3.1-flash-lite-preview" + assert _get_aux_model_for_provider("gmi") == "google/gemini-3.1-flash-lite-preview" def test_resolve_provider_client_uses_gmi_aux_default(self, monkeypatch): monkeypatch.setenv("GMI_API_KEY", "gmi-test-key") diff --git a/tests/providers/__init__.py b/tests/providers/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/providers/test_e2e_wiring.py b/tests/providers/test_e2e_wiring.py new file mode 100644 index 0000000000..424dad69bc --- /dev/null +++ b/tests/providers/test_e2e_wiring.py @@ -0,0 +1,118 @@ +"""E2E tests: verify _build_kwargs_from_profile produces correct output. + +These tests call _build_kwargs_from_profile on the transport directly, +without importing run_agent (which would cause xdist worker contamination). +""" + +import pytest +from agent.transports.chat_completions import ChatCompletionsTransport +from providers import get_provider_profile + + +@pytest.fixture +def transport(): + return ChatCompletionsTransport() + + +def _msgs(): + return [{"role": "user", "content": "hi"}] + + +class TestNvidiaProfileWiring: + def test_nvidia_gets_default_max_tokens(self, transport): + profile = get_provider_profile("nvidia") + kwargs = transport.build_kwargs( + model="nvidia/llama-3.1-nemotron-70b-instruct", + messages=_msgs(), + tools=None, + provider_profile=profile, + max_tokens=None, + max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {}, + timeout=300, + reasoning_config=None, + request_overrides=None, + session_id="test", + ollama_num_ctx=None, + ) + # NVIDIA profile sets default_max_tokens=16384 + assert kwargs.get("max_tokens") == 16384 + + def test_nvidia_nim_alias(self, transport): + profile = get_provider_profile("nvidia-nim") + assert profile is not None + assert profile.name == "nvidia" + assert profile.default_max_tokens == 16384 + + def test_nvidia_model_passed(self, transport): + profile = get_provider_profile("nvidia") + kwargs = transport.build_kwargs( + model="nvidia/test-model", + messages=_msgs(), + tools=None, + provider_profile=profile, + max_tokens=None, + max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {}, + timeout=300, + reasoning_config=None, + request_overrides=None, + session_id="test", + ollama_num_ctx=None, + ) + assert kwargs["model"] == "nvidia/test-model" + + def test_nvidia_messages_passed(self, transport): + profile = get_provider_profile("nvidia") + msgs = _msgs() + kwargs = transport.build_kwargs( + model="nvidia/test", + messages=msgs, + tools=None, + provider_profile=profile, + max_tokens=None, + max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {}, + timeout=300, + reasoning_config=None, + request_overrides=None, + session_id="test", + ollama_num_ctx=None, + ) + assert kwargs["messages"] == msgs + + +class TestDeepSeekProfileWiring: + def test_deepseek_no_forced_max_tokens(self, transport): + profile = get_provider_profile("deepseek") + kwargs = transport.build_kwargs( + model="deepseek-chat", + messages=_msgs(), + tools=None, + provider_profile=profile, + max_tokens=None, + max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {}, + timeout=300, + reasoning_config=None, + request_overrides=None, + session_id="test", + ollama_num_ctx=None, + ) + # DeepSeek has no default_max_tokens + assert kwargs["model"] == "deepseek-chat" + assert kwargs.get("max_tokens") is None or "max_tokens" not in kwargs + + def test_deepseek_messages_passed(self, transport): + profile = get_provider_profile("deepseek") + msgs = _msgs() + kwargs = transport.build_kwargs( + model="deepseek-chat", + messages=msgs, + tools=None, + provider_profile=profile, + max_tokens=None, + max_tokens_param_fn=lambda x: {"max_tokens": x} if x else {}, + timeout=300, + reasoning_config=None, + request_overrides=None, + session_id="test", + ollama_num_ctx=None, + ) + assert kwargs["messages"] == msgs diff --git a/tests/providers/test_profile_wiring.py b/tests/providers/test_profile_wiring.py new file mode 100644 index 0000000000..9096c82b6a --- /dev/null +++ b/tests/providers/test_profile_wiring.py @@ -0,0 +1,290 @@ +"""Profile-path parity tests: verify profile path produces identical output to legacy flags. + +Each test calls build_kwargs twice — once with legacy flags, once with provider_profile — +and asserts the output is identical. This catches any behavioral drift between the two paths. +""" + +import pytest +from agent.transports.chat_completions import ChatCompletionsTransport +from providers import get_provider_profile + + +@pytest.fixture +def transport(): + return ChatCompletionsTransport() + + +def _msgs(): + return [{"role": "user", "content": "hello"}] + + +def _max_tokens_fn(n): + return {"max_completion_tokens": n} + + +class TestNvidiaProfileParity: + def test_max_tokens_match(self, transport): + """NVIDIA profile sets max_tokens=16384; legacy flag is removed.""" + profile = transport.build_kwargs( + model="nvidia/nemotron", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("nvidia"), + max_tokens_param_fn=_max_tokens_fn, + ) + assert profile["max_completion_tokens"] == 16384 + + +class TestKimiProfileParity: + def test_temperature_omitted(self, transport): + legacy = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi-coding"), omit_temperature=True, + ) + profile = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi"), + ) + assert "temperature" not in legacy + assert "temperature" not in profile + + def test_max_tokens(self, transport): + legacy = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi-coding"), max_tokens_param_fn=_max_tokens_fn, + ) + profile = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi"), + max_tokens_param_fn=_max_tokens_fn, + ) + assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 32000 + + def test_thinking_enabled(self, transport): + rc = {"enabled": True, "effort": "high"} + legacy = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi-coding"), reasoning_config=rc, + ) + profile = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi"), + reasoning_config=rc, + ) + assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"] + assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "high" + + def test_thinking_disabled(self, transport): + rc = {"enabled": False} + legacy = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi-coding"), reasoning_config=rc, + ) + profile = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi"), + reasoning_config=rc, + ) + assert profile["extra_body"]["thinking"] == legacy["extra_body"]["thinking"] + assert profile["extra_body"]["thinking"]["type"] == "disabled" + assert "reasoning_effort" not in profile + assert "reasoning_effort" not in legacy + + def test_reasoning_effort_default(self, transport): + rc = {"enabled": True} + legacy = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi-coding"), reasoning_config=rc, + ) + profile = transport.build_kwargs( + model="kimi-k2", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("kimi"), + reasoning_config=rc, + ) + assert profile["reasoning_effort"] == legacy["reasoning_effort"] == "medium" + + +class TestOpenRouterProfileParity: + def test_provider_preferences(self, transport): + prefs = {"allow": ["anthropic"]} + legacy = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), provider_preferences=prefs, + ) + profile = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + provider_preferences=prefs, + ) + assert profile["extra_body"]["provider"] == legacy["extra_body"]["provider"] + + def test_reasoning_full_config(self, transport): + rc = {"enabled": True, "effort": "high"} + legacy = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), supports_reasoning=True, reasoning_config=rc, + ) + profile = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + supports_reasoning=True, reasoning_config=rc, + ) + assert profile["extra_body"]["reasoning"] == legacy["extra_body"]["reasoning"] + + def test_default_reasoning(self, transport): + legacy = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), supports_reasoning=True, + ) + profile = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + supports_reasoning=True, + ) + assert profile["extra_body"]["reasoning"] == legacy["extra_body"]["reasoning"] + + +class TestNousProfileParity: + def test_tags(self, transport): + legacy = transport.build_kwargs( + model="hermes-3", messages=_msgs(), tools=None, provider_profile=get_provider_profile("nous"), + ) + profile = transport.build_kwargs( + model="hermes-3", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("nous"), + ) + assert profile["extra_body"]["tags"] == legacy["extra_body"]["tags"] + + def test_reasoning_omitted_when_disabled(self, transport): + rc = {"enabled": False} + legacy = transport.build_kwargs( + model="hermes-3", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("nous"), supports_reasoning=True, reasoning_config=rc, + ) + profile = transport.build_kwargs( + model="hermes-3", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("nous"), + supports_reasoning=True, reasoning_config=rc, + ) + assert "reasoning" not in legacy.get("extra_body", {}) + assert "reasoning" not in profile.get("extra_body", {}) + + +class TestQwenProfileParity: + def test_max_tokens(self, transport): + legacy = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("qwen-oauth"), max_tokens_param_fn=_max_tokens_fn, + ) + profile = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("qwen"), + max_tokens_param_fn=_max_tokens_fn, + ) + assert profile["max_completion_tokens"] == legacy["max_completion_tokens"] == 65536 + + def test_vl_high_resolution(self, transport): + legacy = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, provider_profile=get_provider_profile("qwen-oauth"), + ) + profile = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("qwen"), + ) + assert profile["extra_body"]["vl_high_resolution_images"] == legacy["extra_body"]["vl_high_resolution_images"] + + def test_metadata_top_level(self, transport): + meta = {"sessionId": "s123", "promptId": "p456"} + legacy = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("qwen-oauth"), qwen_session_metadata=meta, + ) + profile = transport.build_kwargs( + model="qwen3.5", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("qwen"), + qwen_session_metadata=meta, + ) + assert profile["metadata"] == legacy["metadata"] == meta + assert "metadata" not in profile.get("extra_body", {}) + + def test_message_preprocessing(self, transport): + """Qwen profile normalizes string content to list-of-parts.""" + msgs = [ + {"role": "system", "content": "You are helpful."}, + {"role": "user", "content": "hello"}, + ] + profile = transport.build_kwargs( + model="qwen3.5", messages=msgs, tools=None, + provider_profile=get_provider_profile("qwen"), + ) + out_msgs = profile["messages"] + # System message content normalized + cache_control injected + assert isinstance(out_msgs[0]["content"], list) + assert out_msgs[0]["content"][0]["type"] == "text" + assert "cache_control" in out_msgs[0]["content"][-1] + # User message content normalized + assert isinstance(out_msgs[1]["content"], list) + assert out_msgs[1]["content"][0] == {"type": "text", "text": "hello"} + + +class TestDeveloperRoleParity: + """Developer role swap must work on BOTH legacy and profile paths.""" + + def test_legacy_path_swaps_for_gpt5(self, transport): + msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}] + kw = transport.build_kwargs( + model="gpt-5.4", messages=msgs, tools=None, + ) + assert kw["messages"][0]["role"] == "developer" + + def test_profile_path_swaps_for_gpt5(self, transport): + msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}] + kw = transport.build_kwargs( + model="gpt-5.4", messages=msgs, tools=None, + provider_profile=get_provider_profile("openrouter"), + ) + assert kw["messages"][0]["role"] == "developer" + + def test_profile_path_no_swap_for_claude(self, transport): + msgs = [{"role": "system", "content": "Be helpful"}, {"role": "user", "content": "hi"}] + kw = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", messages=msgs, tools=None, + provider_profile=get_provider_profile("openrouter"), + ) + assert kw["messages"][0]["role"] == "system" + + +class TestRequestOverridesParity: + """request_overrides with extra_body must merge identically on both paths.""" + + def test_extra_body_override_legacy(self, transport): + kw = transport.build_kwargs( + model="gpt-5.4", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + request_overrides={"extra_body": {"custom_key": "custom_val"}}, + ) + assert kw["extra_body"]["custom_key"] == "custom_val" + + def test_extra_body_override_profile(self, transport): + kw = transport.build_kwargs( + model="gpt-5.4", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + request_overrides={"extra_body": {"custom_key": "custom_val"}}, + ) + assert kw["extra_body"]["custom_key"] == "custom_val" + + def test_extra_body_override_merges_with_provider_body(self, transport): + """Override extra_body merges WITH provider extra_body, not replaces.""" + kw = transport.build_kwargs( + model="hermes-3", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("nous"), + request_overrides={"extra_body": {"custom": True}}, + ) + assert kw["extra_body"]["tags"] == ["product=hermes-agent"] # from profile + assert kw["extra_body"]["custom"] is True # from override + + def test_top_level_override(self, transport): + kw = transport.build_kwargs( + model="gpt-5.4", messages=_msgs(), tools=None, + provider_profile=get_provider_profile("openrouter"), + request_overrides={"top_p": 0.9}, + ) + assert kw["top_p"] == 0.9 diff --git a/tests/providers/test_provider_profiles.py b/tests/providers/test_provider_profiles.py new file mode 100644 index 0000000000..3e80b0d2f2 --- /dev/null +++ b/tests/providers/test_provider_profiles.py @@ -0,0 +1,203 @@ +"""Tests for the provider module registry and profiles.""" + +import pytest +from providers import get_provider_profile, _REGISTRY +from providers.base import ProviderProfile, OMIT_TEMPERATURE + + +class TestRegistry: + def test_discovery_populates_registry(self): + p = get_provider_profile("nvidia") + assert p is not None + assert p.name == "nvidia" + + def test_alias_lookup(self): + assert get_provider_profile("kimi").name == "kimi-coding" + assert get_provider_profile("moonshot").name == "kimi-coding" + assert get_provider_profile("kimi-coding-cn").name == "kimi-coding-cn" + assert get_provider_profile("or").name == "openrouter" + assert get_provider_profile("nous-portal").name == "nous" + assert get_provider_profile("qwen").name == "qwen-oauth" + assert get_provider_profile("qwen-portal").name == "qwen-oauth" + + def test_unknown_provider_returns_none(self): + assert get_provider_profile("nonexistent-provider") is None + + def test_all_providers_have_name(self): + get_provider_profile("nvidia") # trigger discovery + for name, profile in _REGISTRY.items(): + assert profile.name == name + + +class TestNvidiaProfile: + def test_max_tokens(self): + p = get_provider_profile("nvidia") + assert p.default_max_tokens == 16384 + + def test_no_special_temperature(self): + p = get_provider_profile("nvidia") + assert p.fixed_temperature is None + + def test_base_url(self): + p = get_provider_profile("nvidia") + assert "nvidia.com" in p.base_url + + +class TestKimiProfile: + def test_temperature_omit(self): + p = get_provider_profile("kimi") + assert p.fixed_temperature is OMIT_TEMPERATURE + + def test_max_tokens(self): + p = get_provider_profile("kimi") + assert p.default_max_tokens == 32000 + + def test_cn_separate_profile(self): + p = get_provider_profile("kimi-coding-cn") + assert p.name == "kimi-coding-cn" + assert p.env_vars == ("KIMI_CN_API_KEY",) + assert "moonshot.cn" in p.base_url + + def test_cn_not_alias_of_kimi(self): + kimi = get_provider_profile("kimi-coding") + cn = get_provider_profile("kimi-coding-cn") + assert kimi is not cn + assert kimi.base_url != cn.base_url + + def test_thinking_enabled(self): + p = get_provider_profile("kimi") + eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True, "effort": "high"}) + assert eb["thinking"] == {"type": "enabled"} + assert tl["reasoning_effort"] == "high" + + def test_thinking_disabled(self): + p = get_provider_profile("kimi") + eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": False}) + assert eb["thinking"] == {"type": "disabled"} + assert "reasoning_effort" not in tl + + def test_reasoning_effort_default(self): + p = get_provider_profile("kimi") + eb, tl = p.build_api_kwargs_extras(reasoning_config={"enabled": True}) + assert tl["reasoning_effort"] == "medium" + + def test_no_config_defaults(self): + p = get_provider_profile("kimi") + eb, tl = p.build_api_kwargs_extras(reasoning_config=None) + assert eb["thinking"] == {"type": "enabled"} + assert tl["reasoning_effort"] == "medium" + + +class TestOpenRouterProfile: + def test_extra_body_with_prefs(self): + p = get_provider_profile("openrouter") + body = p.build_extra_body(provider_preferences={"allow": ["anthropic"]}) + assert body["provider"] == {"allow": ["anthropic"]} + + def test_extra_body_no_prefs(self): + p = get_provider_profile("openrouter") + body = p.build_extra_body() + assert body == {} + + def test_reasoning_full_config(self): + p = get_provider_profile("openrouter") + eb, _ = p.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "high"}, + supports_reasoning=True, + ) + assert eb["reasoning"] == {"enabled": True, "effort": "high"} + + def test_reasoning_disabled_still_passes(self): + """OpenRouter passes disabled reasoning through (unlike Nous).""" + p = get_provider_profile("openrouter") + eb, _ = p.build_api_kwargs_extras( + reasoning_config={"enabled": False}, + supports_reasoning=True, + ) + assert eb["reasoning"] == {"enabled": False} + + def test_default_reasoning(self): + p = get_provider_profile("openrouter") + eb, _ = p.build_api_kwargs_extras(supports_reasoning=True) + assert eb["reasoning"] == {"enabled": True, "effort": "medium"} + + +class TestNousProfile: + def test_tags(self): + p = get_provider_profile("nous") + body = p.build_extra_body() + assert body["tags"] == ["product=hermes-agent"] + + def test_auth_type(self): + p = get_provider_profile("nous") + assert p.auth_type == "oauth_device_code" + + def test_reasoning_enabled(self): + p = get_provider_profile("nous") + eb, _ = p.build_api_kwargs_extras( + reasoning_config={"enabled": True, "effort": "medium"}, + supports_reasoning=True, + ) + assert eb["reasoning"] == {"enabled": True, "effort": "medium"} + + def test_reasoning_omitted_when_disabled(self): + p = get_provider_profile("nous") + eb, _ = p.build_api_kwargs_extras( + reasoning_config={"enabled": False}, + supports_reasoning=True, + ) + assert "reasoning" not in eb + + +class TestQwenProfile: + def test_max_tokens(self): + p = get_provider_profile("qwen-oauth") + assert p.default_max_tokens == 65536 + + def test_auth_type(self): + p = get_provider_profile("qwen-oauth") + assert p.auth_type == "oauth_external" + + def test_extra_body_vl(self): + p = get_provider_profile("qwen-oauth") + body = p.build_extra_body() + assert body["vl_high_resolution_images"] is True + + def test_prepare_messages_normalizes_content(self): + p = get_provider_profile("qwen-oauth") + msgs = [ + {"role": "system", "content": "Be helpful"}, + {"role": "user", "content": "hello"}, + ] + result = p.prepare_messages(msgs) + # System message: content normalized to list, cache_control on last part + assert isinstance(result[0]["content"], list) + assert result[0]["content"][-1].get("cache_control") == {"type": "ephemeral"} + assert result[0]["content"][-1]["text"] == "Be helpful" + # User message: content normalized to list + assert isinstance(result[1]["content"], list) + assert result[1]["content"][0]["text"] == "hello" + + def test_metadata_top_level(self): + p = get_provider_profile("qwen-oauth") + meta = {"sessionId": "s123", "promptId": "p456"} + eb, tl = p.build_api_kwargs_extras(qwen_session_metadata=meta) + assert tl["metadata"] == meta + assert "metadata" not in eb + + +class TestBaseProfile: + def test_prepare_messages_passthrough(self): + p = ProviderProfile(name="test") + msgs = [{"role": "user", "content": "hi"}] + assert p.prepare_messages(msgs) is msgs + + def test_build_extra_body_empty(self): + p = ProviderProfile(name="test") + assert p.build_extra_body() == {} + + def test_build_api_kwargs_extras_empty(self): + p = ProviderProfile(name="test") + eb, tl = p.build_api_kwargs_extras() + assert eb == {} + assert tl == {} diff --git a/tests/providers/test_transport_parity.py b/tests/providers/test_transport_parity.py new file mode 100644 index 0000000000..be88bc580a --- /dev/null +++ b/tests/providers/test_transport_parity.py @@ -0,0 +1,258 @@ +"""Parity tests: pin the exact current transport behavior per provider. + +These tests document the flag-based contract between run_agent.py and +ChatCompletionsTransport.build_kwargs(). When the next PR wires profiles +to replace flags, every assertion here must still pass — any failure is +a behavioral regression. +""" + +import pytest +from agent.transports.chat_completions import ChatCompletionsTransport +from providers import get_provider_profile + + +@pytest.fixture +def transport(): + return ChatCompletionsTransport() + + +def _simple_messages(): + return [{"role": "user", "content": "hello"}] + + +def _max_tokens_fn(n): + return {"max_completion_tokens": n} + + +class TestNvidiaParity: + """NVIDIA NIM: default max_tokens=16384.""" + + def test_default_max_tokens(self, transport): + """NVIDIA default max_tokens=16384 comes from profile, not legacy is_nvidia_nim flag.""" + from providers import get_provider_profile + + profile = get_provider_profile("nvidia") + kw = transport.build_kwargs( + model="nvidia/llama-3.1-nemotron-70b-instruct", + messages=_simple_messages(), + tools=None, + max_tokens_param_fn=_max_tokens_fn, + provider_profile=profile, + ) + assert kw["max_completion_tokens"] == 16384 + + def test_user_max_tokens_overrides(self, transport): + from providers import get_provider_profile + + profile = get_provider_profile("nvidia") + kw = transport.build_kwargs( + model="nvidia/llama-3.1-nemotron-70b-instruct", + messages=_simple_messages(), + tools=None, + max_tokens=4096, + max_tokens_param_fn=_max_tokens_fn, + provider_profile=profile, + ) + assert kw["max_completion_tokens"] == 4096 # user overrides default + + +class TestKimiParity: + """Kimi: OMIT temperature, max_tokens=32000, thinking + reasoning_effort.""" + + def test_temperature_omitted(self, transport): + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("kimi-coding"), + omit_temperature=True, + ) + assert "temperature" not in kw + + def test_default_max_tokens(self, transport): + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("kimi-coding"), + max_tokens_param_fn=_max_tokens_fn, + ) + assert kw["max_completion_tokens"] == 32000 + + def test_thinking_enabled(self, transport): + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("kimi-coding"), + reasoning_config={"enabled": True, "effort": "high"}, + ) + assert kw["extra_body"]["thinking"] == {"type": "enabled"} + + def test_thinking_disabled(self, transport): + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("kimi-coding"), + reasoning_config={"enabled": False}, + ) + assert kw["extra_body"]["thinking"] == {"type": "disabled"} + + def test_reasoning_effort_top_level(self, transport): + """Kimi reasoning_effort is a TOP-LEVEL api_kwargs key, NOT in extra_body.""" + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("kimi-coding"), + reasoning_config={"enabled": True, "effort": "high"}, + ) + assert kw.get("reasoning_effort") == "high" + assert "reasoning_effort" not in kw.get("extra_body", {}) + + def test_reasoning_effort_default_medium(self, transport): + kw = transport.build_kwargs( + model="kimi-k2", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("kimi-coding"), + reasoning_config={"enabled": True}, + ) + assert kw.get("reasoning_effort") == "medium" + + +class TestOpenRouterParity: + """OpenRouter: provider preferences, reasoning in extra_body.""" + + def test_provider_preferences(self, transport): + prefs = {"allow": ["anthropic"], "sort": "price"} + kw = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("openrouter"), + provider_preferences=prefs, + ) + assert kw["extra_body"]["provider"] == prefs + + def test_reasoning_passes_full_config(self, transport): + """OpenRouter passes the FULL reasoning_config dict, not just effort.""" + rc = {"enabled": True, "effort": "high"} + kw = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("openrouter"), + supports_reasoning=True, + reasoning_config=rc, + ) + assert kw["extra_body"]["reasoning"] == rc + + def test_default_reasoning_when_no_config(self, transport): + """When supports_reasoning=True but no config, adds default.""" + kw = transport.build_kwargs( + model="anthropic/claude-sonnet-4.6", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("openrouter"), + supports_reasoning=True, + ) + assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"} + + +class TestNousParity: + """Nous: product tags, reasoning, omit when disabled.""" + + def test_tags(self, transport): + kw = transport.build_kwargs( + model="hermes-3-llama-3.1-405b", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("nous"), + ) + assert kw["extra_body"]["tags"] == ["product=hermes-agent"] + + def test_reasoning_omitted_when_disabled(self, transport): + """Nous special case: reasoning omitted entirely when disabled.""" + kw = transport.build_kwargs( + model="hermes-3-llama-3.1-405b", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("nous"), + supports_reasoning=True, + reasoning_config={"enabled": False}, + ) + assert "reasoning" not in kw.get("extra_body", {}) + + def test_reasoning_enabled(self, transport): + rc = {"enabled": True, "effort": "high"} + kw = transport.build_kwargs( + model="hermes-3-llama-3.1-405b", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("nous"), + supports_reasoning=True, + reasoning_config=rc, + ) + assert kw["extra_body"]["reasoning"] == rc + + +class TestQwenParity: + """Qwen: max_tokens=65536, vl_high_resolution, metadata top-level.""" + + def test_default_max_tokens(self, transport): + kw = transport.build_kwargs( + model="qwen3.5-plus", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("qwen-oauth"), + max_tokens_param_fn=_max_tokens_fn, + ) + assert kw["max_completion_tokens"] == 65536 + + def test_vl_high_resolution(self, transport): + kw = transport.build_kwargs( + model="qwen3.5-plus", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("qwen-oauth"), + ) + assert kw["extra_body"]["vl_high_resolution_images"] is True + + def test_metadata_top_level(self, transport): + """Qwen metadata goes to top-level api_kwargs, NOT extra_body.""" + meta = {"sessionId": "s123", "promptId": "p456"} + kw = transport.build_kwargs( + model="qwen3.5-plus", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("qwen-oauth"), + qwen_session_metadata=meta, + ) + assert kw["metadata"] == meta + assert "metadata" not in kw.get("extra_body", {}) + + +class TestCustomOllamaParity: + """Custom/Ollama: num_ctx, think=false — now tested via profile.""" + + def test_ollama_num_ctx(self, transport): + kw = transport.build_kwargs( + model="llama3.1", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("custom"), + ollama_num_ctx=131072, + ) + assert kw["extra_body"]["options"]["num_ctx"] == 131072 + + def test_think_false_when_disabled(self, transport): + kw = transport.build_kwargs( + model="qwen3:72b", + messages=_simple_messages(), + tools=None, + provider_profile=get_provider_profile("custom"), + reasoning_config={"enabled": False, "effort": "none"}, + ) + assert kw["extra_body"]["think"] is False diff --git a/tests/run_agent/test_run_agent.py b/tests/run_agent/test_run_agent.py index eb2b47f87a..c3f91f07f7 100644 --- a/tests/run_agent/test_run_agent.py +++ b/tests/run_agent/test_run_agent.py @@ -1097,6 +1097,7 @@ class TestBuildApiKwargs: assert "temperature" not in kwargs def test_kimi_coding_endpoint_omits_temperature(self, agent): + agent.provider = "kimi-coding" agent.base_url = "https://api.kimi.com/coding/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-k2.5" @@ -1109,6 +1110,7 @@ class TestBuildApiKwargs: def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent): """Kimi endpoint should send max_tokens=32000 and reasoning_effort as top-level params, matching Kimi CLI's default behavior.""" + agent.provider = "kimi-coding" agent.base_url = "https://api.kimi.com/coding/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-for-coding" @@ -1121,6 +1123,7 @@ class TestBuildApiKwargs: def test_kimi_coding_endpoint_respects_custom_effort(self, agent): """reasoning_effort should reflect reasoning_config.effort when set.""" + agent.provider = "kimi-coding" agent.base_url = "https://api.kimi.com/coding/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-for-coding" @@ -1134,6 +1137,7 @@ class TestBuildApiKwargs: def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent): """Kimi endpoint should send extra_body.thinking={"type":"enabled"} to activate reasoning mode, mirroring Kimi CLI's with_thinking().""" + agent.provider = "kimi-coding" agent.base_url = "https://api.kimi.com/coding/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-for-coding" @@ -1147,6 +1151,7 @@ class TestBuildApiKwargs: """When reasoning_config.enabled=False, thinking should be disabled and reasoning_effort should be omitted entirely — mirroring Kimi CLI's with_thinking("off") which maps to reasoning_effort=None.""" + agent.provider = "kimi-coding" agent.base_url = "https://api.kimi.com/coding/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-for-coding" @@ -1160,6 +1165,7 @@ class TestBuildApiKwargs: def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent): """api.moonshot.ai should get the same Kimi-compatible params.""" + agent.provider = "kimi-coding" agent.base_url = "https://api.moonshot.ai/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-k2.5" @@ -1173,6 +1179,7 @@ class TestBuildApiKwargs: def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent): """api.moonshot.cn (China endpoint) should get the same params.""" + agent.provider = "kimi-coding-cn" agent.base_url = "https://api.moonshot.cn/v1" agent._base_url_lower = agent.base_url.lower() agent.model = "kimi-k2.5" @@ -1185,6 +1192,7 @@ class TestBuildApiKwargs: assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} def test_provider_preferences_injected(self, agent): + agent.provider = "openrouter" agent.base_url = "https://openrouter.ai/api/v1" agent.providers_allowed = ["Anthropic"] messages = [{"role": "user", "content": "hi"}] @@ -1193,6 +1201,7 @@ class TestBuildApiKwargs: def test_reasoning_config_default_openrouter(self, agent): """Default reasoning config for OpenRouter should be medium.""" + agent.provider = "openrouter" agent.base_url = "https://openrouter.ai/api/v1" agent.model = "anthropic/claude-sonnet-4-20250514" messages = [{"role": "user", "content": "hi"}] @@ -1202,6 +1211,7 @@ class TestBuildApiKwargs: assert reasoning["effort"] == "medium" def test_reasoning_config_custom(self, agent): + agent.provider = "openrouter" agent.base_url = "https://openrouter.ai/api/v1" agent.model = "anthropic/claude-sonnet-4-20250514" agent.reasoning_config = {"enabled": False} @@ -1217,6 +1227,7 @@ class TestBuildApiKwargs: assert "reasoning" not in kwargs.get("extra_body", {}) def test_reasoning_sent_for_supported_openrouter_model(self, agent): + agent.provider = "openrouter" agent.base_url = "https://openrouter.ai/api/v1" agent.model = "qwen/qwen3.5-plus-02-15" messages = [{"role": "user", "content": "hi"}] @@ -1224,6 +1235,7 @@ class TestBuildApiKwargs: assert kwargs["extra_body"]["reasoning"]["effort"] == "medium" def test_reasoning_sent_for_nous_route(self, agent): + agent.provider = "nous" agent.base_url = "https://inference-api.nousresearch.com/v1" agent.model = "minimax/minimax-m2.5" messages = [{"role": "user", "content": "hi"}] @@ -1231,18 +1243,38 @@ class TestBuildApiKwargs: assert kwargs["extra_body"]["reasoning"]["effort"] == "medium" def test_reasoning_sent_for_copilot_gpt5(self, agent): - agent.base_url = "https://api.githubcopilot.com" - agent.model = "gpt-5.4" - messages = [{"role": "user", "content": "hi"}] - kwargs = agent._build_api_kwargs(messages) + """Copilot/GitHub Models: GPT-5 reasoning goes in extra_body.reasoning.""" + from agent.transports import get_transport + from providers import get_provider_profile + + transport = get_transport("chat_completions") + profile = get_provider_profile("copilot") + msgs = [{"role": "user", "content": "hi"}] + kwargs = transport.build_kwargs( + model="gpt-5.4", + messages=msgs, + tools=None, + supports_reasoning=True, + provider_profile=profile, + ) assert kwargs["extra_body"]["reasoning"] == {"effort": "medium"} def test_reasoning_xhigh_normalized_for_copilot(self, agent): - agent.base_url = "https://api.githubcopilot.com" - agent.model = "gpt-5.4" - agent.reasoning_config = {"enabled": True, "effort": "xhigh"} - messages = [{"role": "user", "content": "hi"}] - kwargs = agent._build_api_kwargs(messages) + """xhigh effort should normalize to high for Copilot GitHub Models.""" + from agent.transports import get_transport + from providers import get_provider_profile + + transport = get_transport("chat_completions") + profile = get_provider_profile("copilot") + msgs = [{"role": "user", "content": "hi"}] + kwargs = transport.build_kwargs( + model="gpt-5.4", + messages=msgs, + tools=None, + supports_reasoning=True, + reasoning_config={"enabled": True, "effort": "xhigh"}, + provider_profile=profile, + ) assert kwargs["extra_body"]["reasoning"] == {"effort": "high"} def test_reasoning_omitted_for_non_reasoning_copilot_model(self, agent): @@ -1260,6 +1292,7 @@ class TestBuildApiKwargs: def test_qwen_portal_formats_messages_and_metadata(self, agent): + agent.provider = "qwen-oauth" agent.base_url = "https://portal.qwen.ai/v1" agent._base_url_lower = agent.base_url.lower() agent.session_id = "sess-123" @@ -1276,6 +1309,7 @@ class TestBuildApiKwargs: assert kwargs["messages"][2]["content"][0]["text"] == "hi" def test_qwen_portal_normalizes_bare_string_content_parts(self, agent): + agent.provider = "qwen-oauth" agent.base_url = "https://portal.qwen.ai/v1" agent._base_url_lower = agent.base_url.lower() messages = [ @@ -1288,6 +1322,7 @@ class TestBuildApiKwargs: assert user_content[1] == {"type": "text", "text": "world"} def test_qwen_portal_no_system_message(self, agent): + agent.provider = "qwen-oauth" agent.base_url = "https://portal.qwen.ai/v1" agent._base_url_lower = agent.base_url.lower() messages = [{"role": "user", "content": "hi"}] @@ -1308,6 +1343,7 @@ class TestBuildApiKwargs: def test_qwen_portal_default_max_tokens(self, agent): """When max_tokens is None, Qwen Portal gets a default of 65536 to prevent reasoning models from exhausting their output budget.""" + agent.provider = "qwen-oauth" agent.base_url = "https://portal.qwen.ai/v1" agent._base_url_lower = agent.base_url.lower() agent.max_tokens = None @@ -3843,7 +3879,7 @@ def test_aiagent_uses_copilot_acp_client(): patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), patch("run_agent.check_toolset_requirements", return_value={}), patch("run_agent.OpenAI") as mock_openai, - patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp_client, + patch("acp_adapter.copilot_client.CopilotACPClient") as mock_acp_client, ): acp_client = MagicMock() mock_acp_client.return_value = acp_client diff --git a/uv.lock b/uv.lock index dfb2f786b0..8ffbd3050e 100644 --- a/uv.lock +++ b/uv.lock @@ -9,7 +9,7 @@ resolution-markers = [ ] [options] -exclude-newer = "2026-04-17T16:49:45.944715922Z" +exclude-newer = "2026-04-19T17:00:07.266826Z" exclude-newer-span = "P7D" [[package]] @@ -564,30 +564,30 @@ wheels = [ [[package]] name = "boto3" -version = "1.42.89" +version = "1.42.91" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "botocore" }, { name = "jmespath" }, { name = "s3transfer" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/bb/0c/f7bccb22b245cabf392816baba20f9e95f78ace7dbc580fd40136e80e732/boto3-1.42.89.tar.gz", hash = "sha256:3e43aacc0801bba9bcd23a8c271c089af297a69565f783fcdd357ae0e330bf1e", size = 113165, upload-time = "2026-04-13T19:36:17.516Z" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/c0/98b8cec7ca22dde776df48c58940ae1abc425593959b7226e270760d726f/boto3-1.42.91.tar.gz", hash = "sha256:03d70532b17f7f84df37ca7e8c21553280454dea53ae12b15d1cfef9b16fcb8a", size = 113181, upload-time = "2026-04-17T19:31:06.251Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/b9/33/55103ba5ef9975ea54b8d39e69b76eb6e9fded3beae5f01065e26951a3a1/boto3-1.42.89-py3-none-any.whl", hash = "sha256:6204b189f4d0c655535f43d7eaa57ff4e8d965b8463c97e45952291211162932", size = 140556, upload-time = "2026-04-13T19:36:13.894Z" }, + { url = "https://files.pythonhosted.org/packages/02/29/faba6521257c34085cc9b439ef98235b581772580f417fa3629728007270/boto3-1.42.91-py3-none-any.whl", hash = "sha256:04e72071cde022951ce7f81bd9933c90095ab8923e8ced61c8dacfe9edac0f5c", size = 140553, upload-time = "2026-04-17T19:31:02.57Z" }, ] [[package]] name = "botocore" -version = "1.42.89" +version = "1.42.91" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "jmespath" }, { name = "python-dateutil" }, { name = "urllib3" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/0f/cc/e6be943efa9051bd15c2ee14077c2b10d6e27c9e9385fc43a03a5c4ed8b5/botocore-1.42.89.tar.gz", hash = "sha256:95ac52f472dad29942f3088b278ab493044516c16dbf9133c975af16527baa99", size = 15206290, upload-time = "2026-04-13T19:36:02.321Z" } +sdist = { url = "https://files.pythonhosted.org/packages/21/bc/a4b7c46471c2e789ad8c4c7acfd7f302fdb481d93ff870f441249b924ae6/botocore-1.42.91.tar.gz", hash = "sha256:d252e27bc454afdbf5ed3dc617aa423f2c855c081e98b7963093399483ecc698", size = 15213010, upload-time = "2026-04-17T19:30:50.793Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/91/f1/90a7b8eda38b7c3a65ca7ee0075bdf310b6b471cb1b95fab6e8994323a50/botocore-1.42.89-py3-none-any.whl", hash = "sha256:d9b786c8d9db6473063b4cc5be0ba7e6a381082307bd6afb69d4216f9fa95f35", size = 14887287, upload-time = "2026-04-13T19:35:56.677Z" }, + { url = "https://files.pythonhosted.org/packages/b1/fc/24cc0a47c824f13933e210e9ad034b4fba22f7185b8d904c0fbf5a3b2be8/botocore-1.42.91-py3-none-any.whl", hash = "sha256:7a28c3cc6bfab5724ad18899d52402b776a0de7d87fa20c3c5270bcaaf199ce8", size = 14897344, upload-time = "2026-04-17T19:30:44.245Z" }, ] [[package]] @@ -1759,6 +1759,77 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6a/09/e21df6aef1e1ffc0c816f0522ddc3f6dcded766c3261813131c78a704470/gitpython-3.1.46-py3-none-any.whl", hash = "sha256:79812ed143d9d25b6d176a10bb511de0f9c67b1fa641d82097b0ab90398a2058", size = 208620, upload-time = "2026-01-01T15:37:30.574Z" }, ] +[[package]] +name = "google-api-core" +version = "2.30.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "googleapis-common-protos" }, + { name = "proto-plus" }, + { name = "protobuf" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/16/ce/502a57fb0ec752026d24df1280b162294b22a0afb98a326084f9a979138b/google_api_core-2.30.3.tar.gz", hash = "sha256:e601a37f148585319b26db36e219df68c5d07b6382cff2d580e83404e44d641b", size = 177001, upload-time = "2026-04-10T00:41:28.035Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/03/15/e56f351cf6ef1cfea58e6ac226a7318ed1deb2218c4b3cc9bd9e4b786c5a/google_api_core-2.30.3-py3-none-any.whl", hash = "sha256:a85761ba72c444dad5d611c2220633480b2b6be2521eca69cca2dbb3ffd6bfe8", size = 173274, upload-time = "2026-04-09T22:57:16.198Z" }, +] + +[[package]] +name = "google-api-python-client" +version = "2.194.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-api-core" }, + { name = "google-auth" }, + { name = "google-auth-httplib2" }, + { name = "httplib2" }, + { name = "uritemplate" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/60/ab/e83af0eb043e4ccc49571ca7a6a49984e9d00f4e9e6e6f1238d60bc84dce/google_api_python_client-2.194.0.tar.gz", hash = "sha256:db92647bd1a90f40b79c9618461553c2b20b6a43ce7395fa6de07132dc14f023", size = 14443469, upload-time = "2026-04-08T23:07:35.757Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b0/34/5a624e49f179aa5b0cb87b2ce8093960299030ff40423bfbde09360eb908/google_api_python_client-2.194.0-py3-none-any.whl", hash = "sha256:61eaaac3b8fc8fdf11c08af87abc3d1342d1b37319cc1b57405f86ef7697e717", size = 15016514, upload-time = "2026-04-08T23:07:33.093Z" }, +] + +[[package]] +name = "google-auth" +version = "2.49.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "cryptography" }, + { name = "pyasn1-modules" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c6/fc/e925290a1ad95c975c459e2df070fac2b90954e13a0370ac505dff78cb99/google_auth-2.49.2.tar.gz", hash = "sha256:c1ae38500e73065dcae57355adb6278cf8b5c8e391994ae9cbadbcb9631ab409", size = 333958, upload-time = "2026-04-10T00:41:21.888Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/73/76/d241a5c927433420507215df6cac1b1fa4ac0ba7a794df42a84326c68da8/google_auth-2.49.2-py3-none-any.whl", hash = "sha256:c2720924dfc82dedb962c9f52cabb2ab16714fd0a6a707e40561d217574ed6d5", size = 240638, upload-time = "2026-04-10T00:41:14.501Z" }, +] + +[[package]] +name = "google-auth-httplib2" +version = "0.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "httplib2" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/ed/99/107612bef8d24b298bb5a7c8466f908ecda791d43f9466f5c3978f5b24c1/google_auth_httplib2-0.3.1.tar.gz", hash = "sha256:0af542e815784cb64159b4469aa5d71dd41069ba93effa006e1916b1dcd88e55", size = 11152, upload-time = "2026-03-30T22:50:26.766Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/97/e9/93afb14d23a949acaa3f4e7cc51a0024671174e116e35f42850764b99634/google_auth_httplib2-0.3.1-py3-none-any.whl", hash = "sha256:682356a90ef4ba3d06548c37e9112eea6fc00395a11b0303a644c1a86abc275c", size = 9534, upload-time = "2026-03-30T22:49:03.384Z" }, +] + +[[package]] +name = "google-auth-oauthlib" +version = "1.3.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "google-auth" }, + { name = "requests-oauthlib" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a6/82/62482931dcbe5266a2680d0da17096f2aab983ecb320277d9556700ce00e/google_auth_oauthlib-1.3.1.tar.gz", hash = "sha256:14c22c7b3dd3d06dbe44264144409039465effdd1eef94f7ce3710e486cc4bfa", size = 21663, upload-time = "2026-03-30T22:49:56.408Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2a/e0/cb454a95f460903e39f101e950038ec24a072ca69d0a294a6df625cc1627/google_auth_oauthlib-1.3.1-py3-none-any.whl", hash = "sha256:1a139ef23f1318756805b0e95f655c238bffd29655329a2978218248da4ee7f8", size = 19247, upload-time = "2026-03-30T20:02:23.894Z" }, +] + [[package]] name = "googleapis-common-protos" version = "1.73.0" @@ -1912,6 +1983,9 @@ all = [ { name = "elevenlabs" }, { name = "fastapi" }, { name = "faster-whisper" }, + { name = "google-api-python-client" }, + { name = "google-auth-httplib2" }, + { name = "google-auth-oauthlib" }, { name = "honcho-ai" }, { name = "lark-oapi" }, { name = "markdown", marker = "sys_platform == 'linux'" }, @@ -1965,6 +2039,11 @@ feishu = [ { name = "lark-oapi" }, { name = "qrcode" }, ] +google = [ + { name = "google-api-python-client" }, + { name = "google-auth-httplib2" }, + { name = "google-auth-oauthlib" }, +] homeassistant = [ { name = "aiohttp" }, ] @@ -2064,6 +2143,9 @@ requires-dist = [ { name = "faster-whisper", marker = "extra == 'voice'", specifier = ">=1.0.0,<2" }, { name = "fire", specifier = ">=0.7.1,<1" }, { name = "firecrawl-py", specifier = ">=4.16.0,<5" }, + { name = "google-api-python-client", marker = "extra == 'google'", specifier = ">=2.100,<3" }, + { name = "google-auth-httplib2", marker = "extra == 'google'", specifier = ">=0.2,<1" }, + { name = "google-auth-oauthlib", marker = "extra == 'google'", specifier = ">=1.0,<2" }, { name = "hermes-agent", extras = ["acp"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["acp"], marker = "extra == 'termux'" }, { name = "hermes-agent", extras = ["bedrock"], marker = "extra == 'all'" }, @@ -2075,6 +2157,7 @@ requires-dist = [ { name = "hermes-agent", extras = ["dev"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["dingtalk"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["feishu"], marker = "extra == 'all'" }, + { name = "hermes-agent", extras = ["google"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["homeassistant"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["honcho"], marker = "extra == 'all'" }, { name = "hermes-agent", extras = ["honcho"], marker = "extra == 'termux'" }, @@ -2136,7 +2219,7 @@ requires-dist = [ { name = "wandb", marker = "extra == 'rl'", specifier = ">=0.15.0,<1" }, { name = "yc-bench", marker = "python_full_version >= '3.12' and extra == 'yc-bench'", git = "https://github.com/collinear-ai/yc-bench.git?rev=bfb0c88062450f46341bd9a5298903fc2e952a5c" }, ] -provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "bedrock", "termux", "dingtalk", "feishu", "web", "rl", "yc-bench", "all"] +provides-extras = ["modal", "daytona", "dev", "messaging", "cron", "slack", "matrix", "cli", "tts-premium", "voice", "pty", "honcho", "mcp", "homeassistant", "sms", "acp", "mistral", "bedrock", "termux", "dingtalk", "feishu", "google", "web", "rl", "yc-bench", "all"] [[package]] name = "hf-transfer" @@ -2238,6 +2321,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" }, ] +[[package]] +name = "httplib2" +version = "0.31.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyparsing" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/c1/1f/e86365613582c027dda5ddb64e1010e57a3d53e99ab8a72093fa13d565ec/httplib2-0.31.2.tar.gz", hash = "sha256:385e0869d7397484f4eab426197a4c020b606edd43372492337c0b4010ae5d24", size = 250800, upload-time = "2026-01-23T11:04:44.165Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/2f/90/fd509079dfcab01102c0fdd87f3a9506894bc70afcf9e9785ef6b2b3aff6/httplib2-0.31.2-py3-none-any.whl", hash = "sha256:dbf0c2fa3862acf3c55c078ea9c0bc4481d7dc5117cae71be9514912cf9f8349", size = 91099, upload-time = "2026-01-23T11:04:42.78Z" }, +] + [[package]] name = "httptools" version = "0.7.1" @@ -3277,6 +3372,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/57/a7/b35835e278c18b85206834b3aa3abe68e77a98769c59233d1f6300284781/numpy-2.4.3-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:4b42639cdde6d24e732ff823a3fa5b701d8acad89c4142bc1d0bd6dc85200ba5", size = 12504685, upload-time = "2026-03-09T07:58:50.525Z" }, ] +[[package]] +name = "oauthlib" +version = "3.3.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0b/5f/19930f824ffeb0ad4372da4812c50edbd1434f678c90c2733e1188edfc63/oauthlib-3.3.1.tar.gz", hash = "sha256:0f0f8aa759826a193cf66c12ea1af1637f87b9b4622d46e866952bb022e538c9", size = 185918, upload-time = "2025-06-19T22:48:08.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/be/9c/92789c596b8df838baa98fa71844d84283302f7604ed565dafe5a6b5041a/oauthlib-3.3.1-py3-none-any.whl", hash = "sha256:88119c938d2b8fb88561af5f6ee0eec8cc8d552b7bb1f712743136eb7523b7a1", size = 160065, upload-time = "2025-06-19T22:48:06.508Z" }, +] + [[package]] name = "obstore" version = "0.8.2" @@ -3855,6 +3959,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5b/5a/bc7b4a4ef808fa59a816c17b20c4bef6884daebbdf627ff2a161da67da19/propcache-0.4.1-py3-none-any.whl", hash = "sha256:af2a6052aeb6cf17d3e46ee169099044fd8224cbaf75c76a2ef596e8163e2237", size = 13305, upload-time = "2025-10-08T19:49:00.792Z" }, ] +[[package]] +name = "proto-plus" +version = "1.27.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "protobuf" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/81/0d/94dfe80193e79d55258345901acd2917523d56e8381bc4dee7fd38e3868a/proto_plus-1.27.2.tar.gz", hash = "sha256:b2adde53adadf75737c44d3dcb0104fde65250dfc83ad59168b4aa3e574b6a24", size = 57204, upload-time = "2026-03-26T22:18:57.174Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/f3/1fba73eeffafc998a25d59703b63f8be4fe8a5cb12eaff7386a0ba0f7125/proto_plus-1.27.2-py3-none-any.whl", hash = "sha256:6432f75893d3b9e70b9c412f1d2f03f65b11fb164b793d14ae2ca01821d22718", size = 50450, upload-time = "2026-03-26T22:13:42.927Z" }, +] + [[package]] name = "protobuf" version = "6.33.5" @@ -3929,6 +4045,27 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/50/f2/c0e76a0b451ffdf0cf788932e182758eb7558953f4f27f1aff8e2518b653/pyarrow-23.0.1-cp314-cp314t-win_amd64.whl", hash = "sha256:527e8d899f14bd15b740cd5a54ad56b7f98044955373a17179d5956ddb93d9ce", size = 28365807, upload-time = "2026-02-16T10:14:03.892Z" }, ] +[[package]] +name = "pyasn1" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/5f/6583902b6f79b399c9c40674ac384fd9cd77805f9e6205075f828ef11fb2/pyasn1-0.6.3.tar.gz", hash = "sha256:697a8ecd6d98891189184ca1fa05d1bb00e2f84b5977c481452050549c8a72cf", size = 148685, upload-time = "2026-03-17T01:06:53.382Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/5d/a0/7d793dce3fa811fe047d6ae2431c672364b462850c6235ae306c0efd025f/pyasn1-0.6.3-py3-none-any.whl", hash = "sha256:a80184d120f0864a52a073acc6fc642847d0be408e7c7252f31390c0f4eadcde", size = 83997, upload-time = "2026-03-17T01:06:52.036Z" }, +] + +[[package]] +name = "pyasn1-modules" +version = "0.4.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "pyasn1" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/e9/e6/78ebbb10a8c8e4b61a59249394a4a594c1a7af95593dc933a349c8d00964/pyasn1_modules-0.4.2.tar.gz", hash = "sha256:677091de870a80aae844b1ca6134f54652fa2c8c5a52aa396440ac3106e941e6", size = 307892, upload-time = "2025-03-28T02:41:22.17Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/47/8d/d529b5d697919ba8c11ad626e835d4039be708a35b0d22de83a269a6682c/pyasn1_modules-0.4.2-py3-none-any.whl", hash = "sha256:29253a9207ce32b64c3ac6600edc75368f98473906e8fd1043bd6b5b1de2c14a", size = 181259, upload-time = "2025-03-28T02:41:19.028Z" }, +] + [[package]] name = "pycparser" version = "3.0" @@ -4529,6 +4666,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/56/5d/c814546c2333ceea4ba42262d8c4d55763003e767fa169adc693bd524478/requests-2.33.0-py3-none-any.whl", hash = "sha256:3324635456fa185245e24865e810cecec7b4caf933d7eb133dcde67d48cee69b", size = 65017, upload-time = "2026-03-25T15:10:40.382Z" }, ] +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "oauthlib" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650, upload-time = "2024-03-22T20:32:29.939Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179, upload-time = "2024-03-22T20:32:28.055Z" }, +] + [[package]] name = "requests-toolbelt" version = "1.0.0" @@ -4664,27 +4814,27 @@ wheels = [ [[package]] name = "ruff" -version = "0.15.10" +version = "0.15.11" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/e7/d9/aa3f7d59a10ef6b14fe3431706f854dbf03c5976be614a9796d36326810c/ruff-0.15.10.tar.gz", hash = "sha256:d1f86e67ebfdef88e00faefa1552b5e510e1d35f3be7d423dc7e84e63788c94e", size = 4631728, upload-time = "2026-04-09T14:06:09.884Z" } +sdist = { url = "https://files.pythonhosted.org/packages/e4/8d/192f3d7103816158dfd5ea50d098ef2aec19194e6cbccd4b3485bdb2eb2d/ruff-0.15.11.tar.gz", hash = "sha256:f092b21708bf0e7437ce9ada249dfe688ff9a0954fc94abab05dcea7dcd29c33", size = 4637264, upload-time = "2026-04-16T18:46:26.58Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/eb/00/a1c2fdc9939b2c03691edbda290afcd297f1f389196172826b03d6b6a595/ruff-0.15.10-py3-none-linux_armv6l.whl", hash = "sha256:0744e31482f8f7d0d10a11fcbf897af272fefdfcb10f5af907b18c2813ff4d5f", size = 10563362, upload-time = "2026-04-09T14:06:21.189Z" }, - { url = "https://files.pythonhosted.org/packages/5c/15/006990029aea0bebe9d33c73c3e28c80c391ebdba408d1b08496f00d422d/ruff-0.15.10-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b1e7c16ea0ff5a53b7c2df52d947e685973049be1cdfe2b59a9c43601897b22e", size = 10951122, upload-time = "2026-04-09T14:06:02.236Z" }, - { url = "https://files.pythonhosted.org/packages/f2/c0/4ac978fe874d0618c7da647862afe697b281c2806f13ce904ad652fa87e4/ruff-0.15.10-py3-none-macosx_11_0_arm64.whl", hash = "sha256:93cc06a19e5155b4441dd72808fdf84290d84ad8a39ca3b0f994363ade4cebb1", size = 10314005, upload-time = "2026-04-09T14:06:00.026Z" }, - { url = "https://files.pythonhosted.org/packages/da/73/c209138a5c98c0d321266372fc4e33ad43d506d7e5dd817dd89b60a8548f/ruff-0.15.10-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:83e1dd04312997c99ea6965df66a14fb4f03ba978564574ffc68b0d61fd3989e", size = 10643450, upload-time = "2026-04-09T14:05:42.137Z" }, - { url = "https://files.pythonhosted.org/packages/ec/76/0deec355d8ec10709653635b1f90856735302cb8e149acfdf6f82a5feb70/ruff-0.15.10-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:8154d43684e4333360fedd11aaa40b1b08a4e37d8ffa9d95fee6fa5b37b6fab1", size = 10379597, upload-time = "2026-04-09T14:05:49.984Z" }, - { url = "https://files.pythonhosted.org/packages/dc/be/86bba8fc8798c081e28a4b3bb6d143ccad3fd5f6f024f02002b8f08a9fa3/ruff-0.15.10-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ab88715f3a6deb6bde6c227f3a123410bec7b855c3ae331b4c006189e895cef", size = 11146645, upload-time = "2026-04-09T14:06:12.246Z" }, - { url = "https://files.pythonhosted.org/packages/a8/89/140025e65911b281c57be1d385ba1d932c2366ca88ae6663685aed8d4881/ruff-0.15.10-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a768ff5969b4f44c349d48edf4ab4f91eddb27fd9d77799598e130fb628aa158", size = 12030289, upload-time = "2026-04-09T14:06:04.776Z" }, - { url = "https://files.pythonhosted.org/packages/88/de/ddacca9545a5e01332567db01d44bd8cf725f2db3b3d61a80550b48308ea/ruff-0.15.10-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:0ee3ef42dab7078bda5ff6a1bcba8539e9857deb447132ad5566a038674540d0", size = 11496266, upload-time = "2026-04-09T14:05:55.485Z" }, - { url = "https://files.pythonhosted.org/packages/bc/bb/7ddb00a83760ff4a83c4e2fc231fd63937cc7317c10c82f583302e0f6586/ruff-0.15.10-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:51cb8cc943e891ba99989dd92d61e29b1d231e14811db9be6440ecf25d5c1609", size = 11256418, upload-time = "2026-04-09T14:05:57.69Z" }, - { url = "https://files.pythonhosted.org/packages/dc/8d/55de0d35aacf6cd50b6ee91ee0f291672080021896543776f4170fc5c454/ruff-0.15.10-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:e59c9bdc056a320fb9ea1700a8d591718b8faf78af065484e801258d3a76bc3f", size = 11288416, upload-time = "2026-04-09T14:05:44.695Z" }, - { url = "https://files.pythonhosted.org/packages/68/cf/9438b1a27426ec46a80e0a718093c7f958ef72f43eb3111862949ead3cc1/ruff-0.15.10-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:136c00ca2f47b0018b073f28cb5c1506642a830ea941a60354b0e8bc8076b151", size = 10621053, upload-time = "2026-04-09T14:05:52.782Z" }, - { url = "https://files.pythonhosted.org/packages/4c/50/e29be6e2c135e9cd4cb15fbade49d6a2717e009dff3766dd080fcb82e251/ruff-0.15.10-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:8b80a2f3c9c8a950d6237f2ca12b206bccff626139be9fa005f14feb881a1ae8", size = 10378302, upload-time = "2026-04-09T14:06:14.361Z" }, - { url = "https://files.pythonhosted.org/packages/18/2f/e0b36a6f99c51bb89f3a30239bc7bf97e87a37ae80aa2d6542d6e5150364/ruff-0.15.10-py3-none-musllinux_1_2_i686.whl", hash = "sha256:e3e53c588164dc025b671c9df2462429d60357ea91af7e92e9d56c565a9f1b07", size = 10850074, upload-time = "2026-04-09T14:06:16.581Z" }, - { url = "https://files.pythonhosted.org/packages/11/08/874da392558ce087a0f9b709dc6ec0d60cbc694c1c772dab8d5f31efe8cb/ruff-0.15.10-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:b0c52744cf9f143a393e284125d2576140b68264a93c6716464e129a3e9adb48", size = 11358051, upload-time = "2026-04-09T14:06:18.948Z" }, - { url = "https://files.pythonhosted.org/packages/e4/46/602938f030adfa043e67112b73821024dc79f3ab4df5474c25fa4c1d2d14/ruff-0.15.10-py3-none-win32.whl", hash = "sha256:d4272e87e801e9a27a2e8df7b21011c909d9ddd82f4f3281d269b6ba19789ca5", size = 10588964, upload-time = "2026-04-09T14:06:07.14Z" }, - { url = "https://files.pythonhosted.org/packages/25/b6/261225b875d7a13b33a6d02508c39c28450b2041bb01d0f7f1a83d569512/ruff-0.15.10-py3-none-win_amd64.whl", hash = "sha256:28cb32d53203242d403d819fd6983152489b12e4a3ae44993543d6fe62ab42ed", size = 11745044, upload-time = "2026-04-09T14:05:39.473Z" }, - { url = "https://files.pythonhosted.org/packages/58/ed/dea90a65b7d9e69888890fb14c90d7f51bf0c1e82ad800aeb0160e4bacfd/ruff-0.15.10-py3-none-win_arm64.whl", hash = "sha256:601d1610a9e1f1c2165a4f561eeaa2e2ea1e97f3287c5aa258d3dab8b57c6188", size = 11035607, upload-time = "2026-04-09T14:05:47.593Z" }, + { url = "https://files.pythonhosted.org/packages/02/1e/6aca3427f751295ab011828e15e9bf452200ac74484f1db4be0197b8170b/ruff-0.15.11-py3-none-linux_armv6l.whl", hash = "sha256:e927cfff503135c558eb581a0c9792264aae9507904eb27809cdcff2f2c847b7", size = 10607943, upload-time = "2026-04-16T18:46:05.967Z" }, + { url = "https://files.pythonhosted.org/packages/e7/26/1341c262e74f36d4e84f3d6f4df0ac68cd53331a66bfc5080daa17c84c0b/ruff-0.15.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:7a1b5b2938d8f890b76084d4fa843604d787a912541eae85fd7e233398bbb73e", size = 10988592, upload-time = "2026-04-16T18:46:00.742Z" }, + { url = "https://files.pythonhosted.org/packages/03/71/850b1d6ffa9564fbb6740429bad53df1094082fe515c8c1e74b6d8d05f18/ruff-0.15.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d4176f3d194afbdaee6e41b9ccb1a2c287dba8700047df474abfbe773825d1cb", size = 10338501, upload-time = "2026-04-16T18:46:03.723Z" }, + { url = "https://files.pythonhosted.org/packages/f2/11/cc1284d3e298c45a817a6aadb6c3e1d70b45c9b36d8d9cce3387b495a03a/ruff-0.15.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3b17c886fb88203ced3afe7f14e8d5ae96e9d2f4ccc0ee66aa19f2c2675a27e4", size = 10670693, upload-time = "2026-04-16T18:46:41.941Z" }, + { url = "https://files.pythonhosted.org/packages/ce/9e/f8288b034ab72b371513c13f9a41d9ba3effac54e24bfb467b007daee2ca/ruff-0.15.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:49fafa220220afe7758a487b048de4c8f9f767f37dfefad46b9dd06759d003eb", size = 10416177, upload-time = "2026-04-16T18:46:21.717Z" }, + { url = "https://files.pythonhosted.org/packages/85/71/504d79abfd3d92532ba6bbe3d1c19fada03e494332a59e37c7c2dabae427/ruff-0.15.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f2ab8427e74a00d93b8bda1307b1e60970d40f304af38bccb218e056c220120d", size = 11221886, upload-time = "2026-04-16T18:46:15.086Z" }, + { url = "https://files.pythonhosted.org/packages/43/5a/947e6ab7a5ad603d65b474be15a4cbc6d29832db5d762cd142e4e3a74164/ruff-0.15.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:195072c0c8e1fc8f940652073df082e37a5d9cb43b4ab1e4d0566ab8977a13b7", size = 12075183, upload-time = "2026-04-16T18:46:07.944Z" }, + { url = "https://files.pythonhosted.org/packages/9f/a1/0b7bb6268775fdd3a0818aee8efd8f5b4e231d24dd4d528ced2534023182/ruff-0.15.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a3a0996d486af3920dec930a2e7daed4847dfc12649b537a9335585ada163e9e", size = 11516575, upload-time = "2026-04-16T18:46:31.687Z" }, + { url = "https://files.pythonhosted.org/packages/30/c3/bb5168fc4d233cc06e95f482770d0f3c87945a0cd9f614b90ea8dc2f2833/ruff-0.15.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bef2cb556d509259f1fe440bb9cd33c756222cf0a7afe90d15edf0866702431", size = 11306537, upload-time = "2026-04-16T18:46:36.988Z" }, + { url = "https://files.pythonhosted.org/packages/e4/92/4cfae6441f3967317946f3b788136eecf093729b94d6561f963ed810c82e/ruff-0.15.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:030d921a836d7d4a12cf6e8d984a88b66094ccb0e0f17ddd55067c331191bf19", size = 11296813, upload-time = "2026-04-16T18:46:24.182Z" }, + { url = "https://files.pythonhosted.org/packages/43/26/972784c5dde8313acde8ac71ba8ac65475b85db4a2352a76c9934361f9bc/ruff-0.15.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0e783b599b4577788dbbb66b9addcef87e9a8832f4ce0c19e34bf55543a2f890", size = 10633136, upload-time = "2026-04-16T18:46:39.802Z" }, + { url = "https://files.pythonhosted.org/packages/5b/53/3985a4f185020c2f367f2e08a103032e12564829742a1b417980ce1514a0/ruff-0.15.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:ae90592246625ba4a34349d68ec28d4400d75182b71baa196ddb9f82db025ef5", size = 10424701, upload-time = "2026-04-16T18:46:10.381Z" }, + { url = "https://files.pythonhosted.org/packages/d3/57/bf0dfb32241b56c83bb663a826133da4bf17f682ba8c096973065f6e6a68/ruff-0.15.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:1f111d62e3c983ed20e0ca2e800f8d77433a5b1161947df99a5c2a3fb60514f0", size = 10873887, upload-time = "2026-04-16T18:46:29.157Z" }, + { url = "https://files.pythonhosted.org/packages/02/05/e48076b2a57dc33ee8c7a957296f97c744ca891a8ffb4ffb1aaa3b3f517d/ruff-0.15.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:06f483d6646f59eaffba9ae30956370d3a886625f511a3108994000480621d1c", size = 11404316, upload-time = "2026-04-16T18:46:19.462Z" }, + { url = "https://files.pythonhosted.org/packages/88/27/0195d15fe7a897cbcba0904792c4b7c9fdd958456c3a17d2ea6093716a9a/ruff-0.15.11-py3-none-win32.whl", hash = "sha256:476a2aa56b7da0b73a3ee80b6b2f0e19cce544245479adde7baa65466664d5f3", size = 10655535, upload-time = "2026-04-16T18:46:12.47Z" }, + { url = "https://files.pythonhosted.org/packages/3a/5e/c927b325bd4c1d3620211a4b96f47864633199feed60fa936025ab27e090/ruff-0.15.11-py3-none-win_amd64.whl", hash = "sha256:8b6756d88d7e234fb0c98c91511aae3cd519d5e3ed271cae31b20f39cb2a12a3", size = 11779692, upload-time = "2026-04-16T18:46:17.268Z" }, + { url = "https://files.pythonhosted.org/packages/63/b6/aeadee5443e49baa2facd51131159fd6301cc4ccfc1541e4df7b021c37dd/ruff-0.15.11-py3-none-win_arm64.whl", hash = "sha256:063fed18cc1bbe0ee7393957284a6fe8b588c6a406a285af3ee3f46da2391ee4", size = 11032614, upload-time = "2026-04-16T18:46:34.487Z" }, ] [[package]] @@ -5268,6 +5418,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4c/a7/563b2d8fb7edc07320bf69ac6a7eedcd7a1a9d663a6bb90a4d9bd2eda5f7/unpaddedbase64-2.1.0-py3-none-any.whl", hash = "sha256:485eff129c30175d2cd6f0cd8d2310dff51e666f7f36175f738d75dfdbd0b1c6", size = 6083, upload-time = "2021-03-09T11:35:46.7Z" }, ] +[[package]] +name = "uritemplate" +version = "4.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/98/60/f174043244c5306c9988380d2cb10009f91563fc4b31293d27e17201af56/uritemplate-4.2.0.tar.gz", hash = "sha256:480c2ed180878955863323eea31b0ede668795de182617fef9c6ca09e6ec9d0e", size = 33267, upload-time = "2025-06-02T15:12:06.318Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a9/99/3ae339466c9183ea5b8ae87b34c0b897eda475d2aec2307cae60e5cd4f29/uritemplate-4.2.0-py3-none-any.whl", hash = "sha256:962201ba1c4edcab02e60f9a0d3821e82dfc5d2d6662a21abd533879bdb8a686", size = 11488, upload-time = "2025-06-02T15:12:03.405Z" }, +] + [[package]] name = "urllib3" version = "2.6.3" diff --git a/website/docs/developer-guide/adding-providers.md b/website/docs/developer-guide/adding-providers.md index 793d0354d1..5ec127d663 100644 --- a/website/docs/developer-guide/adding-providers.md +++ b/website/docs/developer-guide/adding-providers.md @@ -93,6 +93,42 @@ This path includes everything from Path A plus: 11. `run_agent.py` 12. `pyproject.toml` if a provider SDK is required +## Fast path: Simple API-key providers + +If your provider is just an OpenAI-compatible endpoint that authenticates with a single API key, you do not need to touch `auth.py`, `runtime_provider.py`, `main.py`, or any of the other files in the full checklist below. + +All you need is: + +1. A file in `providers/` (e.g. `providers/myprovider.py`) that calls `register_provider()` with the provider config. +2. That's it. `auth.py` auto-registers every file in `providers/` at startup via a module-level import sweep. + +When you add a `providers/*.py` file and call `register_provider()`, the following wire up automatically: + +1. `PROVIDER_REGISTRY` entry in `auth.py` (credential resolution, env-var lookup) +2. `api_mode` set to `chat_completions` +3. `base_url` sourced from the config or the declared env var +4. `env_vars` checked in priority order for the API key +5. `fallback_models` list registered for the provider +6. `--provider` CLI flag accepts the provider id +7. `hermes model` menu includes the provider +8. `hermes setup` wizard delegates to `main.py` automatically +9. `provider:model` alias syntax works +10. Runtime resolver returns the correct `base_url` and `api_key` +11. `HERMES_INFERENCE_PROVIDER` env-var override accepts the provider id +12. Fallback model activation can switch into the provider cleanly + +See `providers/nvidia.py` or `providers/gmi.py` as a template. + +## Full path: OAuth and complex providers + +Use the full checklist below when your provider needs any of the following: + +- OAuth or token refresh (Nous Portal, Codex, Google Gemini, Qwen Portal, Copilot) +- A non-OpenAI API shape that requires a new adapter (Anthropic Messages, Codex Responses) +- Custom endpoint detection or multi-region probing (z.ai, Kimi) +- A curated static model catalog or live `/models` fetch +- Provider-specific `hermes model` menu entries with bespoke auth flows + ## Step 1: Pick one canonical provider id Choose a single provider id and use it everywhere. diff --git a/website/docs/developer-guide/provider-runtime.md b/website/docs/developer-guide/provider-runtime.md index bf9abe0ce5..beece151cc 100644 --- a/website/docs/developer-guide/provider-runtime.md +++ b/website/docs/developer-guide/provider-runtime.md @@ -20,6 +20,9 @@ Primary implementation: - `hermes_cli/auth.py` — provider registry, `resolve_provider()` - `hermes_cli/model_switch.py` — shared `/model` switch pipeline (CLI + gateway) - `agent/auxiliary_client.py` — auxiliary model routing +- `providers/` — declarative source for `api_mode`, `base_url`, `env_vars`, `fallback_models` (auto-registered into `auth.py` `PROVIDER_REGISTRY` at startup) + +`get_provider_profile()` in `providers/` returns a typed dict for a given provider id. `runtime_provider.py` calls this at resolution time to get the canonical `base_url`, `env_vars` priority list, `api_mode`, and `fallback_models` without needing to duplicate that data in multiple files. Adding a new `providers/*.py` file that calls `register_provider()` is enough for `runtime_provider.py` to pick it up — no branch needed in the resolver itself. If you are trying to add a new first-class inference provider, read [Adding Providers](./adding-providers.md) alongside this page. diff --git a/website/docs/integrations/providers.md b/website/docs/integrations/providers.md index c91bf6e007..a1f4d2201a 100644 --- a/website/docs/integrations/providers.md +++ b/website/docs/integrations/providers.md @@ -423,6 +423,44 @@ model: For on-prem deployments (DGX Spark, local GPU), set `NVIDIA_BASE_URL=http://localhost:8000/v1`. NIM exposes the same OpenAI-compatible chat completions API as build.nvidia.com, so switching between cloud and local is a one-line env-var change. ::: +### GMI Cloud + +Open and reasoning models via [GMI Cloud](https://inference.gmi.ai) — OpenAI-compatible API, API key authentication. + +```bash +# GMI Cloud +hermes chat --provider gmi --model deepseek-ai/DeepSeek-R1 +# Requires: GMI_API_KEY in ~/.hermes/.env +``` + +Or set it permanently in `config.yaml`: +```yaml +model: + provider: "gmi" + default: "deepseek-ai/DeepSeek-R1" +``` + +The base URL can be overridden with `GMI_BASE_URL` (default: `https://api.gmi.ai/v1`). + +### StepFun + +Step-series models via [StepFun](https://platform.stepfun.com) — OpenAI-compatible API, API key authentication. + +```bash +# StepFun +hermes chat --provider stepfun --model step-3-mini +# Requires: STEPFUN_API_KEY in ~/.hermes/.env +``` + +Or set it permanently in `config.yaml`: +```yaml +model: + provider: "stepfun" + default: "step-3-mini" +``` + +The base URL can be overridden with `STEPFUN_BASE_URL` (default: `https://api.stepfun.com/v1`). + ### Hugging Face Inference Providers [Hugging Face Inference Providers](https://huggingface.co/docs/inference-providers) routes to 20+ open models through a unified OpenAI-compatible endpoint (`router.huggingface.co/v1`). Requests are automatically routed to the fastest available backend (Groq, Together, SambaNova, etc.) with automatic failover. @@ -1178,7 +1216,7 @@ fallback_model: When activated, the fallback swaps the model and provider mid-session without losing your conversation. It fires **at most once** per session. -Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `custom`. +Supported providers: `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `gemini`, `google-gemini-cli`, `qwen-oauth`, `huggingface`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `deepseek`, `nvidia`, `gmi`, `stepfun`, `xai`, `ollama-cloud`, `bedrock`, `ai-gateway`, `opencode-zen`, `opencode-go`, `kilocode`, `xiaomi`, `arcee`, `alibaba`, `custom`. :::tip Fallback is configured exclusively through `config.yaml` — there are no environment variables for it. For full details on when it triggers, supported providers, and how it interacts with auxiliary tasks and delegation, see [Fallback Providers](/docs/user-guide/features/fallback-providers). diff --git a/website/docs/reference/environment-variables.md b/website/docs/reference/environment-variables.md index f324edf160..ad1f484bc7 100644 --- a/website/docs/reference/environment-variables.md +++ b/website/docs/reference/environment-variables.md @@ -65,6 +65,10 @@ All variables go in `~/.hermes/.env`. You can also set them with `hermes config | `DEEPSEEK_BASE_URL` | Custom DeepSeek API base URL | | `NVIDIA_API_KEY` | NVIDIA NIM API key — Nemotron and open models ([build.nvidia.com](https://build.nvidia.com)) | | `NVIDIA_BASE_URL` | Override NVIDIA base URL (default: `https://integrate.api.nvidia.com/v1`; set to `http://localhost:8000/v1` for a local NIM endpoint) | +| `GMI_API_KEY` | GMI Cloud API key — open and reasoning models ([inference.gmi.ai](https://inference.gmi.ai)) | +| `GMI_BASE_URL` | Override GMI Cloud base URL (default: `https://api.gmi.ai/v1`) | +| `STEPFUN_API_KEY` | StepFun API key — Step-series models ([platform.stepfun.com](https://platform.stepfun.com)) | +| `STEPFUN_BASE_URL` | Override StepFun base URL (default: `https://api.stepfun.com/v1`) | | `OLLAMA_API_KEY` | Ollama Cloud API key — managed Ollama catalog without local GPU ([ollama.com/settings/keys](https://ollama.com/settings/keys)) | | `OLLAMA_BASE_URL` | Override Ollama Cloud base URL (default: `https://ollama.com/v1`) | | `XAI_API_KEY` | xAI (Grok) API key for chat + TTS ([console.x.ai](https://console.x.ai/)) | @@ -91,7 +95,7 @@ For native Anthropic auth, Hermes prefers Claude Code's own credential files whe | Variable | Description | |----------|-------------| -| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) | +| `HERMES_INFERENCE_PROVIDER` | Override provider selection: `auto`, `custom`, `openrouter`, `nous`, `openai-codex`, `copilot`, `copilot-acp`, `anthropic`, `huggingface`, `gemini`, `zai`, `kimi-coding`, `kimi-coding-cn`, `minimax`, `minimax-cn`, `kilocode`, `xiaomi`, `arcee`, `gmi`, `stepfun`, `alibaba`, `deepseek`, `nvidia`, `ollama-cloud`, `xai` (alias `grok`), `google-gemini-cli`, `qwen-oauth`, `bedrock`, `opencode-zen`, `opencode-go`, `ai-gateway` (default: `auto`) | | `HERMES_PORTAL_BASE_URL` | Override Nous Portal URL (for development/testing) | | `NOUS_INFERENCE_BASE_URL` | Override Nous inference API URL | | `HERMES_NOUS_MIN_KEY_TTL_SECONDS` | Min agent key TTL before re-mint (default: 1800 = 30min) | diff --git a/website/docs/user-guide/features/fallback-providers.md b/website/docs/user-guide/features/fallback-providers.md index a0d699dfb2..b922bd7d6e 100644 --- a/website/docs/user-guide/features/fallback-providers.md +++ b/website/docs/user-guide/features/fallback-providers.md @@ -48,6 +48,8 @@ Both `provider` and `model` are **required**. If either is missing, the fallback | MiniMax (China) | `minimax-cn` | `MINIMAX_CN_API_KEY` | | DeepSeek | `deepseek` | `DEEPSEEK_API_KEY` | | NVIDIA NIM | `nvidia` | `NVIDIA_API_KEY` (optional: `NVIDIA_BASE_URL`) | +| GMI Cloud | `gmi` | `GMI_API_KEY` (optional: `GMI_BASE_URL`) | +| StepFun | `stepfun` | `STEPFUN_API_KEY` (optional: `STEPFUN_BASE_URL`) | | Ollama Cloud | `ollama-cloud` | `OLLAMA_API_KEY` | | Google Gemini (OAuth) | `google-gemini-cli` | `hermes model` (Google OAuth; optional: `HERMES_GEMINI_PROJECT_ID`) | | Google AI Studio | `gemini` | `GOOGLE_API_KEY` (alias: `GEMINI_API_KEY`) |