diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 46afe67f3b..18c384a9c2 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -266,6 +266,14 @@ def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool: return True # Any other endpoint is a third-party proxy +def _is_kimi_coding_endpoint(base_url: str | None) -> bool: + """Return True for Kimi's /coding endpoint that requires claude-code UA.""" + normalized = _normalize_base_url_text(base_url) + if not normalized: + return False + return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding") + + def _requires_bearer_auth(base_url: str | None) -> bool: """Return True for Anthropic-compatible providers that require Bearer auth. @@ -323,9 +331,18 @@ def build_anthropic_client(api_key: str, base_url: str = None, timeout: Optional kwargs["base_url"] = normalized_base_url common_betas = _common_betas_for_base_url(normalized_base_url) - if _requires_bearer_auth(normalized_base_url): + if _is_kimi_coding_endpoint(base_url): + # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0 + # to be recognized as a valid Coding Agent. Without it, returns 403. + # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding. + kwargs["api_key"] = api_key + kwargs["default_headers"] = { + "User-Agent": "claude-code/0.1.0", + **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} ) + } + elif _requires_bearer_auth(normalized_base_url): # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in - # Authorization: Bearer even for regular API keys. Route those endpoints + # Authorization: Bearer *** for regular API keys. Route those endpoints # through auth_token so the SDK sends Bearer auth instead of x-api-key. # Check this before OAuth token shape detection because MiniMax secrets do # not use Anthropic's sk-ant-api prefix and would otherwise be misread as @@ -1409,11 +1426,25 @@ def build_anthropic_kwargs( # MiniMax Anthropic-compat endpoints support thinking (manual mode only, # not adaptive). Haiku does NOT support extended thinking — skip entirely. # + # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has + # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi + # validates the message history and requires every prior assistant + # tool-call message to carry OpenAI-style ``reasoning_content``. The + # Anthropic path never populates that field, and + # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks + # on third-party endpoints — so the request fails with HTTP 400 + # "thinking is enabled but reasoning_content is missing in assistant + # tool call message at index N". Kimi's reasoning is driven server-side + # on the /coding route, so skip Anthropic's thinking parameter entirely + # for that host. (Kimi on chat_completions enables thinking via + # extra_body in the ChatCompletionsTransport — see #13503.) + # # On 4.7+ the `thinking.display` field defaults to "omitted", which # silently hides reasoning text that Hermes surfaces in its CLI. We # request "summarized" so the reasoning blocks stay populated — matching # 4.6 behavior and preserving the activity-feed UX during long tool runs. - if reasoning_config and isinstance(reasoning_config, dict): + _is_kimi_coding = _is_kimi_coding_endpoint(base_url) + if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding: if reasoning_config.get("enabled") is not False and "haiku" not in model.lower(): effort = str(reasoning_config.get("effort", "medium")).lower() budget = THINKING_BUDGET.get(effort, 8000) diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index e4223771fd..b164b54d20 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -185,8 +185,6 @@ auxiliary_is_nous: bool = False # Default auxiliary models per provider _OPENROUTER_MODEL = "google/gemini-3-flash-preview" _NOUS_MODEL = "google/gemini-3-flash-preview" -_NOUS_FREE_TIER_VISION_MODEL = "xiaomi/mimo-v2-omni" -_NOUS_FREE_TIER_AUX_MODEL = "xiaomi/mimo-v2-pro" _NOUS_DEFAULT_BASE_URL = "https://inference-api.nousresearch.com/v1" _ANTHROPIC_DEFAULT_BASE_URL = "https://api.anthropic.com" _AUTH_JSON_PATH = get_hermes_home() / "auth.json" @@ -731,6 +729,33 @@ def _nous_base_url() -> str: return os.getenv("NOUS_INFERENCE_BASE_URL", _NOUS_DEFAULT_BASE_URL) +def _resolve_nous_runtime_api(*, force_refresh: bool = False) -> Optional[tuple[str, str]]: + """Return fresh Nous runtime credentials when available. + + This mirrors the main agent's 401 recovery path and keeps auxiliary + clients aligned with the singleton auth store + mint flow instead of + relying only on whatever raw tokens happen to be sitting in auth.json + or the credential pool. + """ + try: + from hermes_cli.auth import resolve_nous_runtime_credentials + + creds = resolve_nous_runtime_credentials( + min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))), + timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), + force_mint=force_refresh, + ) + except Exception as exc: + logger.debug("Auxiliary Nous runtime credential resolution failed: %s", exc) + return None + + api_key = str(creds.get("api_key") or "").strip() + base_url = str(creds.get("base_url") or "").strip().rstrip("/") + if not api_key or not base_url: + return None + return api_key, base_url + + def _read_codex_access_token() -> Optional[str]: """Read a valid, non-expired Codex OAuth access token from Hermes auth store. @@ -826,7 +851,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeCli return GeminiNativeClient(api_key=api_key, base_url=base_url), model extra = {} if base_url_host_matches(base_url, "api.kimi.com"): - extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} + extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"} elif base_url_host_matches(base_url, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers @@ -852,7 +877,7 @@ def _resolve_api_key_provider() -> Tuple[Optional[Union[OpenAI, "GeminiNativeCli return GeminiNativeClient(api_key=api_key, base_url=base_url), model extra = {} if base_url_host_matches(base_url, "api.kimi.com"): - extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} + extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"} elif base_url_host_matches(base_url, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers @@ -902,29 +927,50 @@ def _try_nous(vision: bool = False) -> Tuple[Optional[OpenAI], Optional[str]]: pass nous = _read_nous_auth() - if not nous: + runtime = _resolve_nous_runtime_api(force_refresh=False) + if runtime is None and not nous: return None, None global auxiliary_is_nous auxiliary_is_nous = True logger.debug("Auxiliary client: Nous Portal") - if nous.get("source") == "pool": - model = "gemini-3-flash" - else: - model = _NOUS_MODEL - # Free-tier users can't use paid auxiliary models — use the free - # models instead: mimo-v2-omni for vision, mimo-v2-pro for text tasks. + + # Ask the Portal which model it currently recommends for this task type. + # The /api/nous/recommended-models endpoint is the authoritative source: + # it distinguishes paid vs free tier recommendations, and get_nous_recommended_aux_model + # auto-detects the caller's tier via check_nous_free_tier(). Fall back to + # _NOUS_MODEL (google/gemini-3-flash-preview) when the Portal is unreachable + # or returns a null recommendation for this task type. + model = _NOUS_MODEL try: - from hermes_cli.models import check_nous_free_tier - if check_nous_free_tier(): - model = _NOUS_FREE_TIER_VISION_MODEL if vision else _NOUS_FREE_TIER_AUX_MODEL - logger.debug("Free-tier Nous account — using %s for auxiliary/%s", - model, "vision" if vision else "text") - except Exception: - pass + from hermes_cli.models import get_nous_recommended_aux_model + recommended = get_nous_recommended_aux_model(vision=vision) + if recommended: + model = recommended + logger.debug( + "Auxiliary/%s: using Portal-recommended model %s", + "vision" if vision else "text", model, + ) + else: + logger.debug( + "Auxiliary/%s: no Portal recommendation, falling back to %s", + "vision" if vision else "text", model, + ) + except Exception as exc: + logger.debug( + "Auxiliary/%s: recommended-models lookup failed (%s); " + "falling back to %s", + "vision" if vision else "text", exc, model, + ) + + if runtime is not None: + api_key, base_url = runtime + else: + api_key = _nous_api_key(nous or {}) + base_url = str((nous or {}).get("inference_base_url") or _nous_base_url()).rstrip("/") return ( OpenAI( - api_key=_nous_api_key(nous), - base_url=str(nous.get("inference_base_url") or _nous_base_url()).rstrip("/"), + api_key=api_key, + base_url=base_url, ), model, ) @@ -1268,6 +1314,15 @@ def _is_connection_error(exc: Exception) -> bool: return False +def _is_auth_error(exc: Exception) -> bool: + """Detect auth failures that should trigger provider-specific refresh.""" + status = getattr(exc, "status_code", None) + if status == 401: + return True + err_lower = str(exc).lower() + return "error code: 401" in err_lower or "authenticationerror" in type(exc).__name__.lower() + + def _try_payment_fallback( failed_provider: str, task: str = None, @@ -1451,7 +1506,7 @@ def _to_async_client(sync_client, model: str): async_kwargs["default_headers"] = copilot_default_headers() elif base_url_host_matches(sync_base_url, "api.kimi.com"): - async_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} + async_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"} return AsyncOpenAI(**async_kwargs), model @@ -1575,7 +1630,13 @@ def resolve_provider_client( # ── Nous Portal (OAuth) ────────────────────────────────────────── if provider == "nous": - client, default = _try_nous() + # Detect vision tasks: either explicit model override from + # _PROVIDER_VISION_MODELS, or caller passed a known vision model. + _is_vision = ( + model in _PROVIDER_VISION_MODELS.values() + or (model or "").strip().lower() == "mimo-v2-omni" + ) + client, default = _try_nous(vision=_is_vision) if client is None: logger.warning("resolve_provider_client: nous requested " "but Nous Portal not configured (run: hermes auth)") @@ -1632,7 +1693,7 @@ def resolve_provider_client( ) extra = {} if base_url_host_matches(custom_base, "api.kimi.com"): - extra["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} + extra["default_headers"] = {"User-Agent": "claude-code/0.1.0"} elif base_url_host_matches(custom_base, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers extra["default_headers"] = copilot_default_headers() @@ -1739,7 +1800,7 @@ def resolve_provider_client( # Provider-specific headers headers = {} if base_url_host_matches(base_url, "api.kimi.com"): - headers["User-Agent"] = "KimiCLI/1.30.0" + headers["User-Agent"] = "claude-code/0.1.0" elif base_url_host_matches(base_url, "api.githubcopilot.com"): from hermes_cli.models import copilot_default_headers @@ -1971,24 +2032,35 @@ def resolve_vision_provider_client( # _PROVIDER_VISION_MODELS provides per-provider vision model # overrides when the provider has a dedicated multimodal model # that differs from the chat model (e.g. xiaomi → mimo-v2-omni, - # zai → glm-5v-turbo). + # zai → glm-5v-turbo). Nous is the exception: it has a dedicated + # strict vision backend with tier-aware defaults, so it must not + # fall through to the user's text chat model here. # 2. OpenRouter (vision-capable aggregator fallback) # 3. Nous Portal (vision-capable aggregator fallback) # 4. Stop main_provider = _read_main_provider() main_model = _read_main_model() if main_provider and main_provider not in ("auto", ""): - vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model) - rpc_client, rpc_model = resolve_provider_client( - main_provider, vision_model, - api_mode=resolved_api_mode) - if rpc_client is not None: - logger.info( - "Vision auto-detect: using main provider %s (%s)", - main_provider, rpc_model or vision_model, - ) - return _finalize( - main_provider, rpc_client, rpc_model or vision_model) + if main_provider == "nous": + sync_client, default_model = _resolve_strict_vision_backend(main_provider) + if sync_client is not None: + logger.info( + "Vision auto-detect: using main provider %s (%s)", + main_provider, default_model or resolved_model or main_model, + ) + return _finalize(main_provider, sync_client, default_model) + else: + vision_model = _PROVIDER_VISION_MODELS.get(main_provider, main_model) + rpc_client, rpc_model = resolve_provider_client( + main_provider, vision_model, + api_mode=resolved_api_mode) + if rpc_client is not None: + logger.info( + "Vision auto-detect: using main provider %s (%s)", + main_provider, rpc_model or vision_model, + ) + return _finalize( + main_provider, rpc_client, rpc_model or vision_model) # Fall back through aggregators (uses their dedicated vision model, # not the user's main model) when main provider has no client. @@ -2063,6 +2135,76 @@ _client_cache_lock = threading.Lock() _CLIENT_CACHE_MAX_SIZE = 64 # safety belt — evict oldest when exceeded +def _client_cache_key( + provider: str, + *, + async_mode: bool, + base_url: Optional[str] = None, + api_key: Optional[str] = None, + api_mode: Optional[str] = None, + main_runtime: Optional[Dict[str, Any]] = None, +) -> tuple: + runtime = _normalize_main_runtime(main_runtime) + runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else () + return (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key) + + +def _store_cached_client(cache_key: tuple, client: Any, default_model: Optional[str], *, bound_loop: Any = None) -> None: + with _client_cache_lock: + old_entry = _client_cache.get(cache_key) + if old_entry is not None and old_entry[0] is not client: + _force_close_async_httpx(old_entry[0]) + try: + close_fn = getattr(old_entry[0], "close", None) + if callable(close_fn): + close_fn() + except Exception: + pass + _client_cache[cache_key] = (client, default_model, bound_loop) + + +def _refresh_nous_auxiliary_client( + *, + cache_provider: str, + model: Optional[str], + async_mode: bool, + base_url: Optional[str] = None, + api_key: Optional[str] = None, + api_mode: Optional[str] = None, + main_runtime: Optional[Dict[str, Any]] = None, +) -> Tuple[Optional[Any], Optional[str]]: + """Refresh Nous runtime creds, rebuild the client, and replace the cache entry.""" + runtime = _resolve_nous_runtime_api(force_refresh=True) + if runtime is None: + return None, model + + fresh_key, fresh_base_url = runtime + sync_client = OpenAI(api_key=fresh_key, base_url=fresh_base_url) + final_model = model + + current_loop = None + if async_mode: + try: + import asyncio as _aio + current_loop = _aio.get_event_loop() + except RuntimeError: + pass + client, final_model = _to_async_client(sync_client, final_model or "") + else: + client = sync_client + + cache_key = _client_cache_key( + cache_provider, + async_mode=async_mode, + base_url=base_url, + api_key=api_key, + api_mode=api_mode, + main_runtime=main_runtime, + ) + _store_cached_client(cache_key, client, final_model, bound_loop=current_loop) + return client, final_model + + def neuter_async_httpx_del() -> None: """Monkey-patch ``AsyncHttpxClientWrapper.__del__`` to be a no-op. @@ -2216,8 +2358,14 @@ def _get_cached_client( except RuntimeError: pass runtime = _normalize_main_runtime(main_runtime) - runtime_key = tuple(runtime.get(field, "") for field in _MAIN_RUNTIME_FIELDS) if provider == "auto" else () - cache_key = (provider, async_mode, base_url or "", api_key or "", api_mode or "", runtime_key) + cache_key = _client_cache_key( + provider, + async_mode=async_mode, + base_url=base_url, + api_key=api_key, + api_mode=api_mode, + main_runtime=main_runtime, + ) with _client_cache_lock: if cache_key in _client_cache: cached_client, cached_default, cached_loop = _client_cache[cache_key] @@ -2665,6 +2813,29 @@ def call_llm( raise first_err = retry_err + # ── Nous auth refresh parity with main agent ────────────────── + client_is_nous = ( + resolved_provider == "nous" + or base_url_host_matches(_base_info, "inference-api.nousresearch.com") + ) + if _is_auth_error(first_err) and client_is_nous: + refreshed_client, refreshed_model = _refresh_nous_auxiliary_client( + cache_provider=resolved_provider or "nous", + model=final_model, + async_mode=False, + base_url=resolved_base_url, + api_key=resolved_api_key, + api_mode=resolved_api_mode, + main_runtime=main_runtime, + ) + if refreshed_client is not None: + logger.info("Auxiliary %s: refreshed Nous runtime credentials after 401, retrying", + task or "call") + if refreshed_model and refreshed_model != kwargs.get("model"): + kwargs["model"] = refreshed_model + return _validate_llm_response( + refreshed_client.chat.completions.create(**kwargs), task) + # ── Payment / credit exhaustion fallback ────────────────────── # When the resolved provider returns 402 or a credit-related error, # try alternative providers instead of giving up. This handles the @@ -2863,6 +3034,28 @@ async def async_call_llm( raise first_err = retry_err + # ── Nous auth refresh parity with main agent ────────────────── + client_is_nous = ( + resolved_provider == "nous" + or base_url_host_matches(_client_base, "inference-api.nousresearch.com") + ) + if _is_auth_error(first_err) and client_is_nous: + refreshed_client, refreshed_model = _refresh_nous_auxiliary_client( + cache_provider=resolved_provider or "nous", + model=final_model, + async_mode=True, + base_url=resolved_base_url, + api_key=resolved_api_key, + api_mode=resolved_api_mode, + ) + if refreshed_client is not None: + logger.info("Auxiliary %s (async): refreshed Nous runtime credentials after 401, retrying", + task or "call") + if refreshed_model and refreshed_model != kwargs.get("model"): + kwargs["model"] = refreshed_model + return _validate_llm_response( + await refreshed_client.chat.completions.create(**kwargs), task) + # ── Payment / connection fallback (mirrors sync call_llm) ───── should_fallback = _is_payment_error(first_err) or _is_connection_error(first_err) is_auto = resolved_provider in ("auto", "", None) diff --git a/agent/image_gen_provider.py b/agent/image_gen_provider.py new file mode 100644 index 0000000000..47f65c1b34 --- /dev/null +++ b/agent/image_gen_provider.py @@ -0,0 +1,242 @@ +""" +Image Generation Provider ABC +============================= + +Defines the pluggable-backend interface for image generation. Providers register +instances via ``PluginContext.register_image_gen_provider()``; the active one +(selected via ``image_gen.provider`` in ``config.yaml``) services every +``image_generate`` tool call. + +Providers live in ``/plugins/image_gen//`` (built-in, auto-loaded +as ``kind: backend``) or ``~/.hermes/plugins/image_gen//`` (user, opt-in +via ``plugins.enabled``). + +Response shape +-------------- +All providers return a dict that :func:`success_response` / :func:`error_response` +produce. The tool wrapper JSON-serializes it. Keys: + + success bool + image str | None URL or absolute file path + model str provider-specific model identifier + prompt str echoed prompt + aspect_ratio str "landscape" | "square" | "portrait" + provider str provider name (for diagnostics) + error str only when success=False + error_type str only when success=False +""" + +from __future__ import annotations + +import abc +import base64 +import datetime +import logging +import uuid +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +logger = logging.getLogger(__name__) + + +VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait") +DEFAULT_ASPECT_RATIO = "landscape" + + +# --------------------------------------------------------------------------- +# ABC +# --------------------------------------------------------------------------- + + +class ImageGenProvider(abc.ABC): + """Abstract base class for an image generation backend. + + Subclasses must implement :meth:`generate`. Everything else has sane + defaults — override only what your provider needs. + """ + + @property + @abc.abstractmethod + def name(self) -> str: + """Stable short identifier used in ``image_gen.provider`` config. + + Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``. + """ + + @property + def display_name(self) -> str: + """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``.""" + return self.name.title() + + def is_available(self) -> bool: + """Return True when this provider can service calls. + + Typically checks for a required API key. Default: True + (providers with no external dependencies are always available). + """ + return True + + def list_models(self) -> List[Dict[str, Any]]: + """Return catalog entries for ``hermes tools`` model picker. + + Each entry:: + + { + "id": "gpt-image-1.5", # required + "display": "GPT Image 1.5", # optional; defaults to id + "speed": "~10s", # optional + "strengths": "...", # optional + "price": "$...", # optional + } + + Default: empty list (provider has no user-selectable models). + """ + return [] + + def get_setup_schema(self) -> Dict[str, Any]: + """Return provider metadata for the ``hermes tools`` picker. + + Used by ``tools_config.py`` to inject this provider as a row in + the Image Generation provider list. Shape:: + + { + "name": "OpenAI", # picker label + "badge": "paid", # optional short tag + "tag": "One-line description...", # optional subtitle + "env_vars": [ # keys to prompt for + {"key": "OPENAI_API_KEY", + "prompt": "OpenAI API key", + "url": "https://platform.openai.com/api-keys"}, + ], + } + + Default: minimal entry derived from ``display_name``. Override to + expose API key prompts and custom badges. + """ + return { + "name": self.display_name, + "badge": "", + "tag": "", + "env_vars": [], + } + + def default_model(self) -> Optional[str]: + """Return the default model id, or None if not applicable.""" + models = self.list_models() + if models: + return models[0].get("id") + return None + + @abc.abstractmethod + def generate( + self, + prompt: str, + aspect_ratio: str = DEFAULT_ASPECT_RATIO, + **kwargs: Any, + ) -> Dict[str, Any]: + """Generate an image. + + Implementations should return the dict from :func:`success_response` + or :func:`error_response`. ``kwargs`` may contain forward-compat + parameters future versions of the schema will expose — implementations + should ignore unknown keys. + """ + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def resolve_aspect_ratio(value: Optional[str]) -> str: + """Clamp an aspect_ratio value to the valid set, defaulting to landscape. + + Invalid values are coerced rather than rejected so the tool surface is + forgiving of agent mistakes. + """ + if not isinstance(value, str): + return DEFAULT_ASPECT_RATIO + v = value.strip().lower() + if v in VALID_ASPECT_RATIOS: + return v + return DEFAULT_ASPECT_RATIO + + +def _images_cache_dir() -> Path: + """Return ``$HERMES_HOME/cache/images/``, creating parents as needed.""" + from hermes_constants import get_hermes_home + + path = get_hermes_home() / "cache" / "images" + path.mkdir(parents=True, exist_ok=True) + return path + + +def save_b64_image( + b64_data: str, + *, + prefix: str = "image", + extension: str = "png", +) -> Path: + """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``. + + Returns the absolute :class:`Path` to the saved file. + + Filename format: ``__.``. + """ + raw = base64.b64decode(b64_data) + ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") + short = uuid.uuid4().hex[:8] + path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}" + path.write_bytes(raw) + return path + + +def success_response( + *, + image: str, + model: str, + prompt: str, + aspect_ratio: str, + provider: str, + extra: Optional[Dict[str, Any]] = None, +) -> Dict[str, Any]: + """Build a uniform success response dict. + + ``image`` may be an HTTP URL or an absolute filesystem path (for b64 + providers like OpenAI). Callers that need to pass through additional + backend-specific fields can supply ``extra``. + """ + payload: Dict[str, Any] = { + "success": True, + "image": image, + "model": model, + "prompt": prompt, + "aspect_ratio": aspect_ratio, + "provider": provider, + } + if extra: + for k, v in extra.items(): + payload.setdefault(k, v) + return payload + + +def error_response( + *, + error: str, + error_type: str = "provider_error", + provider: str = "", + model: str = "", + prompt: str = "", + aspect_ratio: str = DEFAULT_ASPECT_RATIO, +) -> Dict[str, Any]: + """Build a uniform error response dict.""" + return { + "success": False, + "image": None, + "error": error, + "error_type": error_type, + "model": model, + "prompt": prompt, + "aspect_ratio": aspect_ratio, + "provider": provider, + } diff --git a/agent/image_gen_registry.py b/agent/image_gen_registry.py new file mode 100644 index 0000000000..715133231c --- /dev/null +++ b/agent/image_gen_registry.py @@ -0,0 +1,120 @@ +""" +Image Generation Provider Registry +================================== + +Central map of registered providers. Populated by plugins at import-time via +``PluginContext.register_image_gen_provider()``; consumed by the +``image_generate`` tool to dispatch each call to the active backend. + +Active selection +---------------- +The active provider is chosen by ``image_gen.provider`` in ``config.yaml``. +If unset, :func:`get_active_provider` applies fallback logic: + +1. If exactly one provider is registered, use it. +2. Otherwise if a provider named ``fal`` is registered, use it (legacy + default — matches pre-plugin behavior). +3. Otherwise return ``None`` (the tool surfaces a helpful error pointing + the user at ``hermes tools``). +""" + +from __future__ import annotations + +import logging +import threading +from typing import Dict, List, Optional + +from agent.image_gen_provider import ImageGenProvider + +logger = logging.getLogger(__name__) + + +_providers: Dict[str, ImageGenProvider] = {} +_lock = threading.Lock() + + +def register_provider(provider: ImageGenProvider) -> None: + """Register an image generation provider. + + Re-registration (same ``name``) overwrites the previous entry and logs + a debug message — this makes hot-reload scenarios (tests, dev loops) + behave predictably. + """ + if not isinstance(provider, ImageGenProvider): + raise TypeError( + f"register_provider() expects an ImageGenProvider instance, " + f"got {type(provider).__name__}" + ) + name = provider.name + if not isinstance(name, str) or not name.strip(): + raise ValueError("Image gen provider .name must be a non-empty string") + with _lock: + existing = _providers.get(name) + _providers[name] = provider + if existing is not None: + logger.debug("Image gen provider '%s' re-registered (was %r)", name, type(existing).__name__) + else: + logger.debug("Registered image gen provider '%s' (%s)", name, type(provider).__name__) + + +def list_providers() -> List[ImageGenProvider]: + """Return all registered providers, sorted by name.""" + with _lock: + items = list(_providers.values()) + return sorted(items, key=lambda p: p.name) + + +def get_provider(name: str) -> Optional[ImageGenProvider]: + """Return the provider registered under *name*, or None.""" + if not isinstance(name, str): + return None + with _lock: + return _providers.get(name.strip()) + + +def get_active_provider() -> Optional[ImageGenProvider]: + """Resolve the currently-active provider. + + Reads ``image_gen.provider`` from config.yaml; falls back per the + module docstring. + """ + configured: Optional[str] = None + try: + from hermes_cli.config import load_config + + cfg = load_config() + section = cfg.get("image_gen") if isinstance(cfg, dict) else None + if isinstance(section, dict): + raw = section.get("provider") + if isinstance(raw, str) and raw.strip(): + configured = raw.strip() + except Exception as exc: + logger.debug("Could not read image_gen.provider from config: %s", exc) + + with _lock: + snapshot = dict(_providers) + + if configured: + provider = snapshot.get(configured) + if provider is not None: + return provider + logger.debug( + "image_gen.provider='%s' configured but not registered; falling back", + configured, + ) + + # Fallback: single-provider case + if len(snapshot) == 1: + return next(iter(snapshot.values())) + + # Fallback: prefer legacy FAL for backward compat + if "fal" in snapshot: + return snapshot["fal"] + + return None + + +def _reset_for_tests() -> None: + """Clear the registry. **Test-only.**""" + with _lock: + _providers.clear() diff --git a/agent/prompt_builder.py b/agent/prompt_builder.py index 2a21043494..8e061f831b 100644 --- a/agent/prompt_builder.py +++ b/agent/prompt_builder.py @@ -350,7 +350,13 @@ PLATFORM_HINTS = { ), "cli": ( "You are a CLI AI Agent. Try not to use markdown but simple text " - "renderable inside a terminal." + "renderable inside a terminal. " + "File delivery: there is no attachment channel — the user reads your " + "response directly in their terminal. Do NOT emit MEDIA:/path tags " + "(those are only intercepted on messaging platforms like Telegram, " + "Discord, Slack, etc.; on the CLI they render as literal text). " + "When referring to a file you created or changed, just state its " + "absolute path in plain text; the user can open it from there." ), "sms": ( "You are communicating via SMS. Keep responses concise and use plain text " diff --git a/agent/transports/__init__.py b/agent/transports/__init__.py index 6cd3a277a1..5752113325 100644 --- a/agent/transports/__init__.py +++ b/agent/transports/__init__.py @@ -37,3 +37,15 @@ def _discover_transports() -> None: import agent.transports.anthropic # noqa: F401 except ImportError: pass + try: + import agent.transports.codex # noqa: F401 + except ImportError: + pass + try: + import agent.transports.chat_completions # noqa: F401 + except ImportError: + pass + try: + import agent.transports.bedrock # noqa: F401 + except ImportError: + pass diff --git a/agent/transports/bedrock.py b/agent/transports/bedrock.py new file mode 100644 index 0000000000..af549e7eae --- /dev/null +++ b/agent/transports/bedrock.py @@ -0,0 +1,154 @@ +"""AWS Bedrock Converse API transport. + +Delegates to the existing adapter functions in agent/bedrock_adapter.py. +Bedrock uses its own boto3 client (not the OpenAI SDK), so the transport +owns format conversion and normalization, while client construction and +boto3 calls stay on AIAgent. +""" + +from typing import Any, Dict, List, Optional + +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse, ToolCall, Usage + + +class BedrockTransport(ProviderTransport): + """Transport for api_mode='bedrock_converse'.""" + + @property + def api_mode(self) -> str: + return "bedrock_converse" + + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: + """Convert OpenAI messages to Bedrock Converse format.""" + from agent.bedrock_adapter import convert_messages_to_converse + return convert_messages_to_converse(messages) + + def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: + """Convert OpenAI tool schemas to Bedrock Converse toolConfig.""" + from agent.bedrock_adapter import convert_tools_to_converse + return convert_tools_to_converse(tools) + + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build Bedrock converse() kwargs. + + Calls convert_messages and convert_tools internally. + + params: + max_tokens: int — output token limit (default 4096) + temperature: float | None + guardrail_config: dict | None — Bedrock guardrails + region: str — AWS region (default 'us-east-1') + """ + from agent.bedrock_adapter import build_converse_kwargs + + region = params.get("region", "us-east-1") + guardrail = params.get("guardrail_config") + + kwargs = build_converse_kwargs( + model=model, + messages=messages, + tools=tools, + max_tokens=params.get("max_tokens", 4096), + temperature=params.get("temperature"), + guardrail_config=guardrail, + ) + # Sentinel keys for dispatch — agent pops these before the boto3 call + kwargs["__bedrock_converse__"] = True + kwargs["__bedrock_region__"] = region + return kwargs + + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize Bedrock response to NormalizedResponse. + + Handles two shapes: + 1. Raw boto3 dict (from direct converse() calls) + 2. Already-normalized SimpleNamespace with .choices (from dispatch site) + """ + from agent.bedrock_adapter import normalize_converse_response + + # Normalize to OpenAI-compatible SimpleNamespace + if hasattr(response, "choices") and response.choices: + # Already normalized at dispatch site + ns = response + else: + # Raw boto3 dict + ns = normalize_converse_response(response) + + choice = ns.choices[0] + msg = choice.message + finish_reason = choice.finish_reason or "stop" + + tool_calls = None + if msg.tool_calls: + tool_calls = [ + ToolCall( + id=tc.id, + name=tc.function.name, + arguments=tc.function.arguments, + ) + for tc in msg.tool_calls + ] + + usage = None + if hasattr(ns, "usage") and ns.usage: + u = ns.usage + usage = Usage( + prompt_tokens=getattr(u, "prompt_tokens", 0) or 0, + completion_tokens=getattr(u, "completion_tokens", 0) or 0, + total_tokens=getattr(u, "total_tokens", 0) or 0, + ) + + reasoning = getattr(msg, "reasoning", None) or getattr(msg, "reasoning_content", None) + + return NormalizedResponse( + content=msg.content, + tool_calls=tool_calls, + finish_reason=finish_reason, + reasoning=reasoning, + usage=usage, + ) + + def validate_response(self, response: Any) -> bool: + """Check Bedrock response structure. + + After normalize_converse_response, the response has OpenAI-compatible + .choices — same check as chat_completions. + """ + if response is None: + return False + # Raw Bedrock dict response — check for 'output' key + if isinstance(response, dict): + return "output" in response + # Already-normalized SimpleNamespace + if hasattr(response, "choices"): + return bool(response.choices) + return False + + def map_finish_reason(self, raw_reason: str) -> str: + """Map Bedrock stop reason to OpenAI finish_reason. + + The adapter already does this mapping inside normalize_converse_response, + so this is only used for direct access to raw responses. + """ + _MAP = { + "end_turn": "stop", + "tool_use": "tool_calls", + "max_tokens": "length", + "stop_sequence": "stop", + "guardrail_intervened": "content_filter", + "content_filtered": "content_filter", + } + return _MAP.get(raw_reason, "stop") + + +# Auto-register on import +from agent.transports import register_transport # noqa: E402 + +register_transport("bedrock_converse", BedrockTransport) diff --git a/agent/transports/chat_completions.py b/agent/transports/chat_completions.py new file mode 100644 index 0000000000..900f59dcf4 --- /dev/null +++ b/agent/transports/chat_completions.py @@ -0,0 +1,387 @@ +"""OpenAI Chat Completions transport. + +Handles the default api_mode ('chat_completions') used by ~16 OpenAI-compatible +providers (OpenRouter, Nous, NVIDIA, Qwen, Ollama, DeepSeek, xAI, Kimi, etc.). + +Messages and tools are already in OpenAI format — convert_messages and +convert_tools are near-identity. The complexity lives in build_kwargs +which has provider-specific conditionals for max_tokens defaults, +reasoning configuration, temperature handling, and extra_body assembly. +""" + +import copy +from typing import Any, Dict, List, Optional + +from agent.prompt_builder import DEVELOPER_ROLE_MODELS +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse, ToolCall, Usage + + +class ChatCompletionsTransport(ProviderTransport): + """Transport for api_mode='chat_completions'. + + The default path for OpenAI-compatible providers. + """ + + @property + def api_mode(self) -> str: + return "chat_completions" + + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> List[Dict[str, Any]]: + """Messages are already in OpenAI format — sanitize Codex leaks only. + + Strips Codex Responses API fields (``codex_reasoning_items`` on the + message, ``call_id``/``response_item_id`` on tool_calls) that strict + chat-completions providers reject with 400/422. + """ + needs_sanitize = False + for msg in messages: + if not isinstance(msg, dict): + continue + if "codex_reasoning_items" in msg: + needs_sanitize = True + break + tool_calls = msg.get("tool_calls") + if isinstance(tool_calls, list): + for tc in tool_calls: + if isinstance(tc, dict) and ("call_id" in tc or "response_item_id" in tc): + needs_sanitize = True + break + if needs_sanitize: + break + + if not needs_sanitize: + return messages + + sanitized = copy.deepcopy(messages) + for msg in sanitized: + if not isinstance(msg, dict): + continue + msg.pop("codex_reasoning_items", None) + tool_calls = msg.get("tool_calls") + if isinstance(tool_calls, list): + for tc in tool_calls: + if isinstance(tc, dict): + tc.pop("call_id", None) + tc.pop("response_item_id", None) + return sanitized + + def convert_tools(self, tools: List[Dict[str, Any]]) -> List[Dict[str, Any]]: + """Tools are already in OpenAI format — identity.""" + return tools + + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build chat.completions.create() kwargs. + + This is the most complex transport method — it handles ~16 providers + via params rather than subclasses. + + params: + timeout: float — API call timeout + max_tokens: int | None — user-configured max tokens + ephemeral_max_output_tokens: int | None — one-shot override (error recovery) + max_tokens_param_fn: callable — returns {max_tokens: N} or {max_completion_tokens: N} + reasoning_config: dict | None + request_overrides: dict | None + session_id: str | None + qwen_session_metadata: dict | None — {sessionId, promptId} precomputed + model_lower: str — lowercase model name for pattern matching + # Provider detection flags (all optional, default False) + is_openrouter: bool + is_nous: bool + is_qwen_portal: bool + is_github_models: bool + is_nvidia_nim: bool + is_kimi: bool + is_custom_provider: bool + ollama_num_ctx: int | None + # Provider routing + provider_preferences: dict | None + # Qwen-specific + qwen_prepare_fn: callable | None — runs AFTER codex sanitization + qwen_prepare_inplace_fn: callable | None — in-place variant for deepcopied lists + # Temperature + fixed_temperature: Any — from _fixed_temperature_for_model() + omit_temperature: bool + # Reasoning + supports_reasoning: bool + github_reasoning_extra: dict | None + # Claude on OpenRouter/Nous max output + anthropic_max_output: int | None + # Extra + extra_body_additions: dict | None — pre-built extra_body entries + """ + # Codex sanitization: drop reasoning_items / call_id / response_item_id + sanitized = self.convert_messages(messages) + + # Qwen portal prep AFTER codex sanitization. If sanitize already + # deepcopied, reuse that copy via the in-place variant to avoid a + # second deepcopy. + is_qwen = params.get("is_qwen_portal", False) + if is_qwen: + qwen_prep = params.get("qwen_prepare_fn") + qwen_prep_inplace = params.get("qwen_prepare_inplace_fn") + if sanitized is messages: + if qwen_prep is not None: + sanitized = qwen_prep(sanitized) + else: + # Already deepcopied — transform in place + if qwen_prep_inplace is not None: + qwen_prep_inplace(sanitized) + elif qwen_prep is not None: + sanitized = qwen_prep(sanitized) + + # Developer role swap for GPT-5/Codex models + model_lower = params.get("model_lower", (model or "").lower()) + if ( + sanitized + and isinstance(sanitized[0], dict) + and sanitized[0].get("role") == "system" + and any(p in model_lower for p in DEVELOPER_ROLE_MODELS) + ): + sanitized = list(sanitized) + sanitized[0] = {**sanitized[0], "role": "developer"} + + api_kwargs: Dict[str, Any] = { + "model": model, + "messages": sanitized, + } + + timeout = params.get("timeout") + if timeout is not None: + api_kwargs["timeout"] = timeout + + # Temperature + fixed_temp = params.get("fixed_temperature") + omit_temp = params.get("omit_temperature", False) + if omit_temp: + api_kwargs.pop("temperature", None) + elif fixed_temp is not None: + api_kwargs["temperature"] = fixed_temp + + # Qwen metadata (caller precomputes {sessionId, promptId}) + qwen_meta = params.get("qwen_session_metadata") + if qwen_meta and is_qwen: + api_kwargs["metadata"] = qwen_meta + + # Tools + if tools: + api_kwargs["tools"] = tools + + # max_tokens resolution — priority: ephemeral > user > provider default + max_tokens_fn = params.get("max_tokens_param_fn") + ephemeral = params.get("ephemeral_max_output_tokens") + max_tokens = params.get("max_tokens") + anthropic_max_out = params.get("anthropic_max_output") + is_nvidia_nim = params.get("is_nvidia_nim", False) + is_kimi = params.get("is_kimi", False) + reasoning_config = params.get("reasoning_config") + + if ephemeral is not None and max_tokens_fn: + api_kwargs.update(max_tokens_fn(ephemeral)) + elif max_tokens is not None and max_tokens_fn: + api_kwargs.update(max_tokens_fn(max_tokens)) + elif is_nvidia_nim and max_tokens_fn: + api_kwargs.update(max_tokens_fn(16384)) + elif is_qwen and max_tokens_fn: + api_kwargs.update(max_tokens_fn(65536)) + elif is_kimi and max_tokens_fn: + # Kimi/Moonshot: 32000 matches Kimi CLI's default + api_kwargs.update(max_tokens_fn(32000)) + elif anthropic_max_out is not None: + api_kwargs["max_tokens"] = anthropic_max_out + + # Kimi: top-level reasoning_effort (unless thinking disabled) + if is_kimi: + _kimi_thinking_off = bool( + reasoning_config + and isinstance(reasoning_config, dict) + and reasoning_config.get("enabled") is False + ) + if not _kimi_thinking_off: + _kimi_effort = "medium" + if reasoning_config and isinstance(reasoning_config, dict): + _e = (reasoning_config.get("effort") or "").strip().lower() + if _e in ("low", "medium", "high"): + _kimi_effort = _e + api_kwargs["reasoning_effort"] = _kimi_effort + + # extra_body assembly + extra_body: Dict[str, Any] = {} + + is_openrouter = params.get("is_openrouter", False) + is_nous = params.get("is_nous", False) + is_github_models = params.get("is_github_models", False) + + provider_prefs = params.get("provider_preferences") + if provider_prefs and is_openrouter: + extra_body["provider"] = provider_prefs + + # Kimi extra_body.thinking + if is_kimi: + _kimi_thinking_enabled = True + if reasoning_config and isinstance(reasoning_config, dict): + if reasoning_config.get("enabled") is False: + _kimi_thinking_enabled = False + extra_body["thinking"] = { + "type": "enabled" if _kimi_thinking_enabled else "disabled", + } + + # Reasoning + if params.get("supports_reasoning", False): + if is_github_models: + gh_reasoning = params.get("github_reasoning_extra") + if gh_reasoning is not None: + extra_body["reasoning"] = gh_reasoning + else: + if reasoning_config is not None: + rc = dict(reasoning_config) + if is_nous and rc.get("enabled") is False: + pass # omit for Nous when disabled + else: + extra_body["reasoning"] = rc + else: + extra_body["reasoning"] = {"enabled": True, "effort": "medium"} + + if is_nous: + extra_body["tags"] = ["product=hermes-agent"] + + # Ollama num_ctx + ollama_ctx = params.get("ollama_num_ctx") + if ollama_ctx: + options = extra_body.get("options", {}) + options["num_ctx"] = ollama_ctx + extra_body["options"] = options + + # Ollama/custom think=false + if params.get("is_custom_provider", False): + if reasoning_config and isinstance(reasoning_config, dict): + _effort = (reasoning_config.get("effort") or "").strip().lower() + _enabled = reasoning_config.get("enabled", True) + if _effort == "none" or _enabled is False: + extra_body["think"] = False + + if is_qwen: + extra_body["vl_high_resolution_images"] = True + + # Merge any pre-built extra_body additions + additions = params.get("extra_body_additions") + if additions: + extra_body.update(additions) + + if extra_body: + api_kwargs["extra_body"] = extra_body + + # Request overrides last (service_tier etc.) + overrides = params.get("request_overrides") + if overrides: + api_kwargs.update(overrides) + + return api_kwargs + + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize OpenAI ChatCompletion to NormalizedResponse. + + For chat_completions, this is near-identity — the response is already + in OpenAI format. extra_content on tool_calls (Gemini thought_signature) + is preserved via ToolCall.provider_data. reasoning_details (OpenRouter + unified format) and reasoning_content (DeepSeek/Moonshot) are also + preserved for downstream replay. + """ + choice = response.choices[0] + msg = choice.message + finish_reason = choice.finish_reason or "stop" + + tool_calls = None + if msg.tool_calls: + tool_calls = [] + for tc in msg.tool_calls: + # Preserve provider-specific extras on the tool call. + # Gemini 3 thinking models attach extra_content with + # thought_signature — without replay on the next turn the API + # rejects the request with 400. + tc_provider_data: Dict[str, Any] = {} + extra = getattr(tc, "extra_content", None) + if extra is None and hasattr(tc, "model_extra"): + extra = (tc.model_extra or {}).get("extra_content") + if extra is not None: + if hasattr(extra, "model_dump"): + try: + extra = extra.model_dump() + except Exception: + pass + tc_provider_data["extra_content"] = extra + tool_calls.append(ToolCall( + id=tc.id, + name=tc.function.name, + arguments=tc.function.arguments, + provider_data=tc_provider_data or None, + )) + + usage = None + if hasattr(response, "usage") and response.usage: + u = response.usage + usage = Usage( + prompt_tokens=getattr(u, "prompt_tokens", 0) or 0, + completion_tokens=getattr(u, "completion_tokens", 0) or 0, + total_tokens=getattr(u, "total_tokens", 0) or 0, + ) + + # Preserve reasoning fields separately. DeepSeek/Moonshot use + # ``reasoning_content``; others use ``reasoning``. Downstream code + # (_extract_reasoning, thinking-prefill retry) reads both distinctly, + # so keep them apart in provider_data rather than merging. + reasoning = getattr(msg, "reasoning", None) + reasoning_content = getattr(msg, "reasoning_content", None) + + provider_data: Dict[str, Any] = {} + if reasoning_content: + provider_data["reasoning_content"] = reasoning_content + rd = getattr(msg, "reasoning_details", None) + if rd: + provider_data["reasoning_details"] = rd + + return NormalizedResponse( + content=msg.content, + tool_calls=tool_calls, + finish_reason=finish_reason, + reasoning=reasoning, + usage=usage, + provider_data=provider_data or None, + ) + + def validate_response(self, response: Any) -> bool: + """Check that response has valid choices.""" + if response is None: + return False + if not hasattr(response, "choices") or response.choices is None: + return False + if not response.choices: + return False + return True + + def extract_cache_stats(self, response: Any) -> Optional[Dict[str, int]]: + """Extract OpenRouter/OpenAI cache stats from prompt_tokens_details.""" + usage = getattr(response, "usage", None) + if usage is None: + return None + details = getattr(usage, "prompt_tokens_details", None) + if details is None: + return None + cached = getattr(details, "cached_tokens", 0) or 0 + written = getattr(details, "cache_write_tokens", 0) or 0 + if cached or written: + return {"cached_tokens": cached, "creation_tokens": written} + return None + + +# Auto-register on import +from agent.transports import register_transport # noqa: E402 + +register_transport("chat_completions", ChatCompletionsTransport) diff --git a/agent/transports/codex.py b/agent/transports/codex.py new file mode 100644 index 0000000000..ec48352193 --- /dev/null +++ b/agent/transports/codex.py @@ -0,0 +1,217 @@ +"""OpenAI Responses API (Codex) transport. + +Delegates to the existing adapter functions in agent/codex_responses_adapter.py. +This transport owns format conversion and normalization — NOT client lifecycle, +streaming, or the _run_codex_stream() call path. +""" + +from typing import Any, Dict, List, Optional + +from agent.transports.base import ProviderTransport +from agent.transports.types import NormalizedResponse, ToolCall, Usage + + +class ResponsesApiTransport(ProviderTransport): + """Transport for api_mode='codex_responses'. + + Wraps the functions extracted into codex_responses_adapter.py (PR 1). + """ + + @property + def api_mode(self) -> str: + return "codex_responses" + + def convert_messages(self, messages: List[Dict[str, Any]], **kwargs) -> Any: + """Convert OpenAI chat messages to Responses API input items.""" + from agent.codex_responses_adapter import _chat_messages_to_responses_input + return _chat_messages_to_responses_input(messages) + + def convert_tools(self, tools: List[Dict[str, Any]]) -> Any: + """Convert OpenAI tool schemas to Responses API function definitions.""" + from agent.codex_responses_adapter import _responses_tools + return _responses_tools(tools) + + def build_kwargs( + self, + model: str, + messages: List[Dict[str, Any]], + tools: Optional[List[Dict[str, Any]]] = None, + **params, + ) -> Dict[str, Any]: + """Build Responses API kwargs. + + Calls convert_messages and convert_tools internally. + + params: + instructions: str — system prompt (extracted from messages[0] if not given) + reasoning_config: dict | None — {effort, enabled} + session_id: str | None — used for prompt_cache_key + xAI conv header + max_tokens: int | None — max_output_tokens + request_overrides: dict | None — extra kwargs merged in + provider: str | None — provider name for backend-specific logic + base_url: str | None — endpoint URL + base_url_hostname: str | None — hostname for backend detection + is_github_responses: bool — Copilot/GitHub models backend + is_codex_backend: bool — chatgpt.com/backend-api/codex + is_xai_responses: bool — xAI/Grok backend + github_reasoning_extra: dict | None — Copilot reasoning params + """ + from agent.codex_responses_adapter import ( + _chat_messages_to_responses_input, + _responses_tools, + ) + + from run_agent import DEFAULT_AGENT_IDENTITY + + instructions = params.get("instructions", "") + payload_messages = messages + if not instructions: + if messages and messages[0].get("role") == "system": + instructions = str(messages[0].get("content") or "").strip() + payload_messages = messages[1:] + if not instructions: + instructions = DEFAULT_AGENT_IDENTITY + + is_github_responses = params.get("is_github_responses", False) + is_codex_backend = params.get("is_codex_backend", False) + is_xai_responses = params.get("is_xai_responses", False) + + # Resolve reasoning effort + reasoning_effort = "medium" + reasoning_enabled = True + reasoning_config = params.get("reasoning_config") + if reasoning_config and isinstance(reasoning_config, dict): + if reasoning_config.get("enabled") is False: + reasoning_enabled = False + elif reasoning_config.get("effort"): + reasoning_effort = reasoning_config["effort"] + + _effort_clamp = {"minimal": "low"} + reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort) + + kwargs = { + "model": model, + "instructions": instructions, + "input": _chat_messages_to_responses_input(payload_messages), + "tools": _responses_tools(tools), + "tool_choice": "auto", + "parallel_tool_calls": True, + "store": False, + } + + session_id = params.get("session_id") + if not is_github_responses and session_id: + kwargs["prompt_cache_key"] = session_id + + if reasoning_enabled and is_xai_responses: + kwargs["include"] = ["reasoning.encrypted_content"] + elif reasoning_enabled: + if is_github_responses: + github_reasoning = params.get("github_reasoning_extra") + if github_reasoning is not None: + kwargs["reasoning"] = github_reasoning + else: + kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"} + kwargs["include"] = ["reasoning.encrypted_content"] + elif not is_github_responses and not is_xai_responses: + kwargs["include"] = [] + + request_overrides = params.get("request_overrides") + if request_overrides: + kwargs.update(request_overrides) + + max_tokens = params.get("max_tokens") + if max_tokens is not None and not is_codex_backend: + kwargs["max_output_tokens"] = max_tokens + + if is_xai_responses and session_id: + kwargs["extra_headers"] = {"x-grok-conv-id": session_id} + + return kwargs + + def normalize_response(self, response: Any, **kwargs) -> NormalizedResponse: + """Normalize Codex Responses API response to NormalizedResponse.""" + from agent.codex_responses_adapter import ( + _normalize_codex_response, + _extract_responses_message_text, + _extract_responses_reasoning_text, + ) + + # _normalize_codex_response returns (SimpleNamespace, finish_reason_str) + msg, finish_reason = _normalize_codex_response(response) + + tool_calls = None + if msg and msg.tool_calls: + tool_calls = [] + for tc in msg.tool_calls: + provider_data = {} + if hasattr(tc, "call_id") and tc.call_id: + provider_data["call_id"] = tc.call_id + if hasattr(tc, "response_item_id") and tc.response_item_id: + provider_data["response_item_id"] = tc.response_item_id + tool_calls.append(ToolCall( + id=tc.id if hasattr(tc, "id") else (tc.function.name if hasattr(tc, "function") else None), + name=tc.function.name if hasattr(tc, "function") else getattr(tc, "name", ""), + arguments=tc.function.arguments if hasattr(tc, "function") else getattr(tc, "arguments", "{}"), + provider_data=provider_data or None, + )) + + # Extract reasoning items for provider_data + provider_data = {} + if msg and hasattr(msg, "codex_reasoning_items") and msg.codex_reasoning_items: + provider_data["codex_reasoning_items"] = msg.codex_reasoning_items + if msg and hasattr(msg, "reasoning_details") and msg.reasoning_details: + provider_data["reasoning_details"] = msg.reasoning_details + + return NormalizedResponse( + content=msg.content if msg else None, + tool_calls=tool_calls, + finish_reason=finish_reason or "stop", + reasoning=msg.reasoning if msg and hasattr(msg, "reasoning") else None, + usage=None, # Codex usage is extracted separately in normalize_usage() + provider_data=provider_data or None, + ) + + def validate_response(self, response: Any) -> bool: + """Check Codex Responses API response has valid output structure. + + Returns True only if response.output is a non-empty list. + Does NOT check output_text fallback — the caller handles that + with diagnostic logging for stream backfill recovery. + """ + if response is None: + return False + output = getattr(response, "output", None) + if not isinstance(output, list) or not output: + return False + return True + + def preflight_kwargs(self, api_kwargs: Any, *, allow_stream: bool = False) -> dict: + """Validate and sanitize Codex API kwargs before the call. + + Normalizes input items, strips unsupported fields, validates structure. + """ + from agent.codex_responses_adapter import _preflight_codex_api_kwargs + return _preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream) + + def map_finish_reason(self, raw_reason: str) -> str: + """Map Codex response.status to OpenAI finish_reason. + + Codex uses response.status ('completed', 'incomplete') + + response.incomplete_details.reason for granular mapping. + This method handles the simple status string; the caller + should check incomplete_details separately for 'max_output_tokens'. + """ + _MAP = { + "completed": "stop", + "incomplete": "length", + "failed": "stop", + "cancelled": "stop", + } + return _MAP.get(raw_reason, "stop") + + +# Auto-register on import +from agent.transports import register_transport # noqa: E402 + +register_transport("codex_responses", ResponsesApiTransport) diff --git a/cli-config.yaml.example b/cli-config.yaml.example index a4a5ffda76..e8e3d30af6 100644 --- a/cli-config.yaml.example +++ b/cli-config.yaml.example @@ -770,10 +770,12 @@ code_execution: # Subagent Delegation # ============================================================================= # The delegate_task tool spawns child agents with isolated context. -# Supports single tasks and batch mode (up to 3 parallel). +# Supports single tasks and batch mode (default 3 parallel, configurable). delegation: max_iterations: 50 # Max tool-calling turns per child (default: 50) - default_toolsets: ["terminal", "file", "web"] # Default toolsets for subagents + # max_concurrent_children: 3 # Max parallel child agents (default: 3) + # max_spawn_depth: 1 # Tree depth cap (1-3, default: 1 = flat). Raise to 2 or 3 to allow orchestrator children to spawn their own workers. + # orchestrator_enabled: true # Kill switch for role="orchestrator" children (default: true). # model: "google/gemini-3-flash-preview" # Override model for subagents (empty = inherit parent) # provider: "openrouter" # Override provider for subagents (empty = inherit parent) # # Resolves full credentials (base_url, api_key) automatically. diff --git a/cli.py b/cli.py index 3851aea295..19bae13fdb 100644 --- a/cli.py +++ b/cli.py @@ -26,6 +26,7 @@ import tempfile import time import uuid import textwrap +from urllib.parse import unquote, urlparse from contextlib import contextmanager from pathlib import Path from datetime import datetime @@ -398,7 +399,6 @@ def load_cli_config() -> Dict[str, Any]: }, "delegation": { "max_iterations": 45, # Max tool-calling turns per child agent - "default_toolsets": ["terminal", "file", "web"], # Default toolsets for subagents "model": "", # Subagent model override (empty = inherit parent model) "provider": "", # Subagent provider override (empty = inherit parent provider) "base_url": "", # Direct OpenAI-compatible endpoint for subagents @@ -1182,11 +1182,11 @@ def _strip_markdown_syntax(text: str) -> str: plain = re.sub(r"!\[([^\]]*)\]\([^\)]*\)", r"\1", plain) plain = re.sub(r"\[([^\]]+)\]\([^\)]*\)", r"\1", plain) plain = re.sub(r"\*\*\*([^*]+)\*\*\*", r"\1", plain) - plain = re.sub(r"___([^_]+)___", r"\1", plain) + plain = re.sub(r"(? Path | None: if (token.startswith('"') and token.endswith('"')) or (token.startswith("'") and token.endswith("'")): token = token[1:-1].strip() + token = token.replace('\\ ', ' ') if not token: return None - expanded = os.path.expandvars(os.path.expanduser(token)) + expanded = token + if token.startswith("file://"): + try: + parsed = urlparse(token) + if parsed.scheme == "file": + expanded = unquote(parsed.path or "") + if parsed.netloc and os.name == "nt": + expanded = f"//{parsed.netloc}{expanded}" + except Exception: + expanded = token + expanded = os.path.expandvars(os.path.expanduser(expanded)) if os.name != "nt": normalized = expanded.replace("\\", "/") if len(normalized) >= 3 and normalized[1] == ":" and normalized[2] == "/" and normalized[0].isalpha(): @@ -1389,6 +1400,7 @@ def _detect_file_drop(user_input: str) -> "dict | None": or stripped.startswith("~") or stripped.startswith("./") or stripped.startswith("../") + or stripped.startswith("file://") or (len(stripped) >= 3 and stripped[1] == ":" and stripped[2] in ("\\", "/") and stripped[0].isalpha()) or stripped.startswith('"/') or stripped.startswith('"~') @@ -1399,8 +1411,25 @@ def _detect_file_drop(user_input: str) -> "dict | None": if not starts_like_path: return None + direct_path = _resolve_attachment_path(stripped) + if direct_path is not None: + return { + "path": direct_path, + "is_image": direct_path.suffix.lower() in _IMAGE_EXTENSIONS, + "remainder": "", + } + first_token, remainder = _split_path_input(stripped) drop_path = _resolve_attachment_path(first_token) + if drop_path is None and " " in stripped and stripped[0] not in {"'", '"'}: + space_positions = [idx for idx, ch in enumerate(stripped) if ch == " "] + for pos in reversed(space_positions): + candidate = stripped[:pos].rstrip() + resolved = _resolve_attachment_path(candidate) + if resolved is not None: + drop_path = resolved + remainder = stripped[pos + 1 :].strip() + break if drop_path is None: return None @@ -8369,6 +8398,17 @@ class HermesCLI: def run_agent(): nonlocal result + # Set callbacks inside the agent thread so thread-local storage + # in terminal_tool is populated for this thread. The main thread + # registration (run() line ~9046) is invisible here because + # _callback_tls is threading.local(). Matches the pattern used + # by acp_adapter/server.py for ACP sessions. + set_sudo_password_callback(self._sudo_password_callback) + set_approval_callback(self._approval_callback) + try: + set_secret_capture_callback(self._secret_capture_callback) + except Exception: + pass agent_message = _voice_prefix + message if _voice_prefix else message # Prepend pending model switch note so the model knows about the switch _msn = getattr(self, '_pending_model_switch_note', None) @@ -8394,6 +8434,15 @@ class HermesCLI: "failed": True, "error": _summary, } + finally: + # Clear thread-local callbacks so a reused thread doesn't + # hold stale references to a disposed CLI instance. + try: + set_sudo_password_callback(None) + set_approval_callback(None) + set_secret_capture_callback(None) + except Exception: + pass # Start agent in background thread (daemon so it cannot keep the # process alive when the user closes the terminal tab — SIGHUP diff --git a/gateway/platforms/base.py b/gateway/platforms/base.py index 8caa07e1cf..da619a6afe 100644 --- a/gateway/platforms/base.py +++ b/gateway/platforms/base.py @@ -1329,7 +1329,7 @@ class BasePlatformAdapter(ABC): # Extract MEDIA: tags, allowing optional whitespace after the colon # and quoted/backticked paths for LLM-formatted outputs. media_pattern = re.compile( - r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?''' + r'''[`"']?MEDIA:\s*(?P`[^`\n]+`|"[^"\n]+"|'[^'\n]+'|(?:~/|/)\S+(?:[^\S\n]+\S+)*?\.(?:png|jpe?g|gif|webp|mp4|mov|avi|mkv|webm|ogg|opus|mp3|wav|m4a|pdf)(?=[\s`"',;:)\]}]|$)|\S+)[`"']?''' ) for match in media_pattern.finditer(content): path = match.group("path").strip() diff --git a/gateway/run.py b/gateway/run.py index eaabdcd7e6..c088dbe902 100644 --- a/gateway/run.py +++ b/gateway/run.py @@ -3889,14 +3889,14 @@ class GatewayRunner: message_text = f"{context_note}\n\n{message_text}" if getattr(event, "reply_to_text", None) and event.reply_to_message_id: + # Always inject the reply-to pointer — even when the quoted text + # already appears in history. The prefix isn't deduplication, it's + # disambiguation: it tells the agent *which* prior message the user + # is referencing. History can contain the same or similar text + # multiple times, and without an explicit pointer the agent has to + # guess (or answer for both subjects). Token overhead is minimal. reply_snippet = str(event.reply_to_text)[:500] - found_in_history = any( - reply_snippet[:200] in (msg.get("content") or "") - for msg in history - if msg.get("role") in ("assistant", "user", "tool") - ) - if not found_in_history: - message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}' + message_text = f'[Replying to: "{reply_snippet}"]\n\n{message_text}' if "@" in message_text: try: diff --git a/hermes_cli/auth.py b/hermes_cli/auth.py index 6de56af5a3..8eec141201 100644 --- a/hermes_cli/auth.py +++ b/hermes_cli/auth.py @@ -168,8 +168,11 @@ PROVIDER_REGISTRY: Dict[str, ProviderConfig] = { id="kimi-coding", name="Kimi / Moonshot", auth_type="api_key", + # Legacy platform.moonshot.ai keys use this endpoint (OpenAI-compat). + # sk-kimi- (Kimi Code) keys are auto-redirected to api.kimi.com/coding + # by _resolve_kimi_base_url() below. inference_base_url="https://api.moonshot.ai/v1", - api_key_env_vars=("KIMI_API_KEY",), + api_key_env_vars=("KIMI_API_KEY", "KIMI_CODING_API_KEY"), base_url_env_var="KIMI_BASE_URL", ), "kimi-coding-cn": ProviderConfig( @@ -340,10 +343,16 @@ def get_anthropic_key() -> str: # ============================================================================= # Kimi Code (kimi.com/code) issues keys prefixed "sk-kimi-" that only work -# on api.kimi.com/coding/v1. Legacy keys from platform.moonshot.ai work on -# api.moonshot.ai/v1 (the default). Auto-detect when user hasn't set +# on api.kimi.com/coding. Legacy keys from platform.moonshot.ai work on +# api.moonshot.ai/v1 (the old default). Auto-detect when user hasn't set # KIMI_BASE_URL explicitly. -KIMI_CODE_BASE_URL = "https://api.kimi.com/coding/v1" +# +# Note: the base URL intentionally has NO /v1 suffix. The /coding endpoint +# speaks the Anthropic Messages protocol, and the anthropic SDK appends +# "/v1/messages" internally — so "/coding" + SDK suffix → "/coding/v1/messages" +# (the correct target). Using "/coding/v1" here would produce +# "/coding/v1/v1/messages" (a 404). +KIMI_CODE_BASE_URL = "https://api.kimi.com/coding" def _resolve_kimi_base_url(api_key: str, default_url: str, env_override: str) -> str: @@ -3379,7 +3388,7 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: ) from hermes_cli.models import ( - _PROVIDER_MODELS, get_pricing_for_provider, filter_nous_free_models, + _PROVIDER_MODELS, get_pricing_for_provider, check_nous_free_tier, partition_nous_models_by_tier, ) model_ids = _PROVIDER_MODELS.get("nous", []) @@ -3388,7 +3397,6 @@ def _login_nous(args, pconfig: ProviderConfig) -> None: unavailable_models: list = [] if model_ids: pricing = get_pricing_for_provider("nous") - model_ids = filter_nous_free_models(model_ids, pricing) free_tier = check_nous_free_tier() if free_tier: model_ids, unavailable_models = partition_nous_models_by_tier( diff --git a/hermes_cli/commands.py b/hermes_cli/commands.py index 797acab5e9..8b43a351fb 100644 --- a/hermes_cli/commands.py +++ b/hermes_cli/commands.py @@ -924,12 +924,22 @@ class SlashCommandCompleter(Completer): display_meta=meta, ) - # If the user typed @file: or @folder:, delegate to path completions + # If the user typed @file: / @folder: (or just @file / @folder with + # no colon yet), delegate to path completions. Accepting the bare + # form lets the picker surface directories as soon as the user has + # typed `@folder`, without requiring them to first accept the static + # `@folder:` hint and re-trigger completion. for prefix in ("@file:", "@folder:"): - if word.startswith(prefix): - path_part = word[len(prefix):] or "." + bare = prefix[:-1] + + if word == bare or word.startswith(prefix): + want_dir = prefix == "@folder:" + path_part = '' if word == bare else word[len(prefix):] expanded = os.path.expanduser(path_part) - if expanded.endswith("/"): + + if not expanded or expanded == ".": + search_dir, match_prefix = ".", "" + elif expanded.endswith("/"): search_dir, match_prefix = expanded, "" else: search_dir = os.path.dirname(expanded) or "." @@ -945,15 +955,21 @@ class SlashCommandCompleter(Completer): for entry in sorted(entries): if match_prefix and not entry.lower().startswith(prefix_lower): continue - if count >= limit: - break full_path = os.path.join(search_dir, entry) is_dir = os.path.isdir(full_path) + # `@folder:` must only surface directories; `@file:` only + # regular files. Without this filter `@folder:` listed + # every .env / .gitignore in the cwd, defeating the + # explicit prefix and confusing users expecting a + # directory picker. + if want_dir != is_dir: + continue + if count >= limit: + break display_path = os.path.relpath(full_path) suffix = "/" if is_dir else "" - kind = "folder" if is_dir else "file" meta = "dir" if is_dir else _file_size_label(full_path) - completion = f"@{kind}:{display_path}{suffix}" + completion = f"{prefix}{display_path}{suffix}" yield Completion( completion, start_position=-len(word), diff --git a/hermes_cli/config.py b/hermes_cli/config.py index 74c27bca94..366b672b56 100644 --- a/hermes_cli/config.py +++ b/hermes_cli/config.py @@ -964,6 +964,10 @@ DEFAULT_CONFIG: _DefaultConfig = { }, # Text-to-speech configuration + # Each provider supports an optional `max_text_length:` override for the + # per-request input-character cap. Omit it to use the provider's documented + # limit (OpenAI 4096, xAI 15000, MiniMax 10000, ElevenLabs 5k-40k model-aware, + # Gemini 5000, Edge 5000, Mistral 4000, NeuTTS/KittenTTS 2000). "tts": { "provider": "edge", # "edge" (free) | "elevenlabs" (premium) | "openai" | "xai" | "minimax" | "mistral" | "neutts" (local) "edge": { @@ -1063,6 +1067,12 @@ DEFAULT_CONFIG: _DefaultConfig = { # independent of the parent's max_iterations) "reasoning_effort": "", # reasoning effort for subagents: "xhigh", "high", "medium", # "low", "minimal", "none" (empty = inherit parent's level) + "max_concurrent_children": 3, # max parallel children per batch; floor of 1 enforced, no ceiling + # Orchestrator role controls (see tools/delegate_tool.py:_get_max_spawn_depth + # and _get_orchestrator_enabled). Values are clamped to [1, 3] with a + # warning log if out of range. + "max_spawn_depth": 1, # depth cap (1 = flat [default], 2 = orchestrator→leaf, 3 = three-level) + "orchestrator_enabled": True, # kill switch for role="orchestrator" }, # Ephemeral prefill messages file — JSON list of {role, content} dicts diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index e16f0bf5e6..2fc50321f6 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -943,18 +943,22 @@ def run_doctor(args): try: import httpx _base = os.getenv(_base_env, "") if _base_env else "" - # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com + # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com/coding/v1 + # (OpenAI-compat surface, which exposes /models for health check). if not _base and _key.startswith("sk-kimi-"): _base = "https://api.kimi.com/coding/v1" - # Anthropic-compat endpoints (/anthropic) don't support /models. - # Rewrite to the OpenAI-compat /v1 surface for health checks. + # Anthropic-compat endpoints (/anthropic, api.kimi.com/coding + # with no /v1) don't support /models. Rewrite to the OpenAI-compat + # /v1 surface for health checks. if _base and _base.rstrip("/").endswith("/anthropic"): from agent.auxiliary_client import _to_openai_base_url _base = _to_openai_base_url(_base) + if base_url_host_matches(_base, "api.kimi.com") and _base.rstrip("/").endswith("/coding"): + _base = _base.rstrip("/") + "/v1" _url = (_base.rstrip("/") + "/models") if _base else _default_url _headers = {"Authorization": f"Bearer {_key}"} if base_url_host_matches(_base, "api.kimi.com"): - _headers["User-Agent"] = "KimiCLI/1.30.0" + _headers["User-Agent"] = "claude-code/0.1.0" _resp = httpx.get( _url, headers=_headers, diff --git a/hermes_cli/main.py b/hermes_cli/main.py index e4c52cb8aa..a5cb11392a 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -2165,7 +2165,6 @@ def _model_flow_nous(config, current_model="", args=None): from hermes_cli.models import ( _PROVIDER_MODELS, get_pricing_for_provider, - filter_nous_free_models, check_nous_free_tier, partition_nous_models_by_tier, ) @@ -2208,10 +2207,8 @@ def _model_flow_nous(config, current_model="", args=None): # Check if user is on free tier free_tier = check_nous_free_tier() - # For both tiers: apply the allowlist filter first (removes non-allowlisted - # free models and allowlist models that aren't actually free). - # Then for free users: partition remaining models into selectable/unavailable. - model_ids = filter_nous_free_models(model_ids, pricing) + # For free users: partition models into selectable/unavailable based on + # whether they are free per the Portal-reported pricing. unavailable_models: list[str] = [] if free_tier: model_ids, unavailable_models = partition_nous_models_by_tier( diff --git a/hermes_cli/model_switch.py b/hermes_cli/model_switch.py index 22721f9a42..5b26f5b8b5 100644 --- a/hermes_cli/model_switch.py +++ b/hermes_cli/model_switch.py @@ -678,6 +678,7 @@ def switch_model( _da = DIRECT_ALIASES.get(resolved_alias) if _da is not None and _da.base_url: base_url = _da.base_url + api_mode = "" # clear so determine_api_mode re-detects from URL if not api_key: api_key = "no-key-required" diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 33614d4263..186119b24d 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -53,6 +53,7 @@ OPENROUTER_MODELS: list[tuple[str, str]] = [ ("stepfun/step-3.5-flash", ""), ("minimax/minimax-m2.7", ""), ("minimax/minimax-m2.5", ""), + ("minimax/minimax-m2.5:free", "free"), ("z-ai/glm-5.1", ""), ("z-ai/glm-5v-turbo", ""), ("z-ai/glm-5-turbo", ""), @@ -125,17 +126,15 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "stepfun/step-3.5-flash", "minimax/minimax-m2.7", "minimax/minimax-m2.5", + "minimax/minimax-m2.5:free", "z-ai/glm-5.1", "z-ai/glm-5v-turbo", "z-ai/glm-5-turbo", "x-ai/grok-4.20-beta", "nvidia/nemotron-3-super-120b-a12b", - "nvidia/nemotron-3-super-120b-a12b:free", - "arcee-ai/trinity-large-preview:free", "arcee-ai/trinity-large-thinking", "openai/gpt-5.4-pro", "openai/gpt-5.4-nano", - "openrouter/elephant-alpha", ], "openai-codex": _codex_curated_models(), "copilot-acp": [ @@ -362,17 +361,11 @@ _PROVIDER_MODELS: dict[str, list[str]] = { _PROVIDER_MODELS["ai-gateway"] = [mid for mid, _ in VERCEL_AI_GATEWAY_MODELS] # --------------------------------------------------------------------------- -# Nous Portal free-model filtering +# Nous Portal free-model helper # --------------------------------------------------------------------------- -# Models that are ALLOWED to appear when priced as free on Nous Portal. -# Any other free model is hidden — prevents promotional/temporary free models -# from cluttering the selection when users are paying subscribers. -# Models in this list are ALSO filtered out if they are NOT free (i.e. they -# should only appear in the menu when they are genuinely free). -_NOUS_ALLOWED_FREE_MODELS: frozenset[str] = frozenset({ - "xiaomi/mimo-v2-pro", - "xiaomi/mimo-v2-omni", -}) +# The Nous Portal models endpoint is the source of truth for which models +# are currently offered (free or paid). We trust whatever it returns and +# surface it to users as-is — no local allowlist filtering. def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool: @@ -386,35 +379,6 @@ def _is_model_free(model_id: str, pricing: dict[str, dict[str, str]]) -> bool: return False -def filter_nous_free_models( - model_ids: list[str], - pricing: dict[str, dict[str, str]], -) -> list[str]: - """Filter the Nous Portal model list according to free-model policy. - - Rules: - • Paid models that are NOT in the allowlist → keep (normal case). - • Free models that are NOT in the allowlist → drop. - • Allowlist models that ARE free → keep. - • Allowlist models that are NOT free → drop. - """ - if not pricing: - return model_ids # no pricing data — can't filter, show everything - - result: list[str] = [] - for mid in model_ids: - free = _is_model_free(mid, pricing) - if mid in _NOUS_ALLOWED_FREE_MODELS: - # Allowlist model: only show when it's actually free - if free: - result.append(mid) - else: - # Regular model: keep only when it's NOT free - if not free: - result.append(mid) - return result - - # --------------------------------------------------------------------------- # Nous Portal account tier detection # --------------------------------------------------------------------------- @@ -478,8 +442,7 @@ def partition_nous_models_by_tier( ) -> tuple[list[str], list[str]]: """Split Nous models into (selectable, unavailable) based on user tier. - For paid-tier users: all models are selectable, none unavailable - (free-model filtering is handled separately by ``filter_nous_free_models``). + For paid-tier users: all models are selectable, none unavailable. For free-tier users: only free models are selectable; paid models are returned as unavailable (shown grayed out in the menu). @@ -549,6 +512,157 @@ def check_nous_free_tier() -> bool: return False # default to paid on error — don't block users +# --------------------------------------------------------------------------- +# Nous Portal recommended models +# +# The Portal publishes a curated list of suggested models (separated into +# paid and free tiers) plus dedicated recommendations for compaction (text +# summarisation / auxiliary) and vision tasks. We fetch it once per process +# with a TTL cache so callers can ask "what's the best aux model right now?" +# without hitting the network on every lookup. +# +# Shape of the response (fields we care about): +# { +# "paidRecommendedModels": [ {modelName, ...}, ... ], +# "freeRecommendedModels": [ {modelName, ...}, ... ], +# "paidRecommendedCompactionModel": {modelName, ...} | null, +# "paidRecommendedVisionModel": {modelName, ...} | null, +# "freeRecommendedCompactionModel": {modelName, ...} | null, +# "freeRecommendedVisionModel": {modelName, ...} | null, +# } +# --------------------------------------------------------------------------- + +NOUS_RECOMMENDED_MODELS_PATH = "/api/nous/recommended-models" +_NOUS_RECOMMENDED_CACHE_TTL: int = 600 # seconds (10 minutes) +# (result_dict, timestamp) keyed by portal_base_url so staging vs prod don't collide. +_nous_recommended_cache: dict[str, tuple[dict[str, Any], float]] = {} + + +def fetch_nous_recommended_models( + portal_base_url: str = "", + timeout: float = 5.0, + *, + force_refresh: bool = False, +) -> dict[str, Any]: + """Fetch the Nous Portal's curated recommended-models payload. + + Hits ``/api/nous/recommended-models``. The endpoint is public — + no auth is required. Results are cached per portal URL for + ``_NOUS_RECOMMENDED_CACHE_TTL`` seconds; pass ``force_refresh=True`` to + bypass the cache. + + Returns the parsed JSON dict on success, or ``{}`` on any failure + (network, parse, non-2xx). Callers must treat missing/null fields as + "no recommendation" and fall back to their own default. + """ + base = (portal_base_url or "https://portal.nousresearch.com").rstrip("/") + now = time.monotonic() + cached = _nous_recommended_cache.get(base) + if not force_refresh and cached is not None: + payload, cached_at = cached + if now - cached_at < _NOUS_RECOMMENDED_CACHE_TTL: + return payload + + url = f"{base}{NOUS_RECOMMENDED_MODELS_PATH}" + try: + req = urllib.request.Request( + url, + headers={"Accept": "application/json"}, + ) + with urllib.request.urlopen(req, timeout=timeout) as resp: + data = json.loads(resp.read().decode()) + if not isinstance(data, dict): + data = {} + except Exception: + data = {} + + _nous_recommended_cache[base] = (data, now) + return data + + +def _resolve_nous_portal_url() -> str: + """Best-effort lookup of the Portal base URL the user is authed against.""" + try: + from hermes_cli.auth import ( + DEFAULT_NOUS_PORTAL_URL, + get_provider_auth_state, + ) + state = get_provider_auth_state("nous") or {} + portal = str(state.get("portal_base_url") or "").strip() + if portal: + return portal.rstrip("/") + return str(DEFAULT_NOUS_PORTAL_URL).rstrip("/") + except Exception: + return "https://portal.nousresearch.com" + + +def _extract_model_name(entry: Any) -> Optional[str]: + """Pull the ``modelName`` field from a recommended-model entry, else None.""" + if not isinstance(entry, dict): + return None + model_name = entry.get("modelName") + if isinstance(model_name, str) and model_name.strip(): + return model_name.strip() + return None + + +def get_nous_recommended_aux_model( + *, + vision: bool = False, + free_tier: Optional[bool] = None, + portal_base_url: str = "", + force_refresh: bool = False, +) -> Optional[str]: + """Return the Portal's recommended model name for an auxiliary task. + + Picks the best field from the Portal's recommended-models payload: + + * ``vision=True`` → ``paidRecommendedVisionModel`` (paid tier) or + ``freeRecommendedVisionModel`` (free tier) + * ``vision=False`` → ``paidRecommendedCompactionModel`` or + ``freeRecommendedCompactionModel`` + + When ``free_tier`` is ``None`` (default) the user's tier is auto-detected + via :func:`check_nous_free_tier`. Pass an explicit bool to bypass the + detection — useful for tests or when the caller already knows the tier. + + For paid-tier users we prefer the paid recommendation but gracefully fall + back to the free recommendation if the Portal returned ``null`` for the + paid field (common during the staged rollout of new paid models). + + Returns ``None`` when every candidate is missing, null, or the fetch + fails — callers should fall back to their own default (currently + ``google/gemini-3-flash-preview``). + """ + base = portal_base_url or _resolve_nous_portal_url() + payload = fetch_nous_recommended_models(base, force_refresh=force_refresh) + if not payload: + return None + + if free_tier is None: + try: + free_tier = check_nous_free_tier() + except Exception: + # On any detection error, assume paid — paid users see both fields + # anyway so this is a safe default that maximises model quality. + free_tier = False + + if vision: + paid_key, free_key = "paidRecommendedVisionModel", "freeRecommendedVisionModel" + else: + paid_key, free_key = "paidRecommendedCompactionModel", "freeRecommendedCompactionModel" + + # Preference order: + # free tier → free only + # paid tier → paid, then free (if paid field is null) + candidates = [free_key] if free_tier else [paid_key, free_key] + for key in candidates: + name = _extract_model_name(payload.get(key)) + if name: + return name + return None + + # --------------------------------------------------------------------------- # Canonical provider list — single source of truth for provider identity. # Every code path that lists, displays, or iterates providers derives from diff --git a/hermes_cli/plugins.py b/hermes_cli/plugins.py index a593782e61..11f18f0716 100644 --- a/hermes_cli/plugins.py +++ b/hermes_cli/plugins.py @@ -133,6 +133,9 @@ def _get_enabled_plugins() -> Optional[set]: # Data classes # --------------------------------------------------------------------------- +_VALID_PLUGIN_KINDS: Set[str] = {"standalone", "backend", "exclusive"} + + @dataclass class PluginManifest: """Parsed representation of a plugin.yaml manifest.""" @@ -146,6 +149,23 @@ class PluginManifest: provides_hooks: List[str] = field(default_factory=list) source: str = "" # "user", "project", or "entrypoint" path: Optional[str] = None + # Plugin kind — see plugins.py module docstring for semantics. + # ``standalone`` (default): hooks/tools of its own; opt-in via + # ``plugins.enabled``. + # ``backend``: pluggable backend for an existing core tool (e.g. + # image_gen). Built-in (bundled) backends auto-load; + # user-installed still gated by ``plugins.enabled``. + # ``exclusive``: category with exactly one active provider (memory). + # Selection via ``.provider`` config key; the + # category's own discovery system handles loading and the + # general scanner skips these. + kind: str = "standalone" + # Registry key — path-derived, used by ``plugins.enabled``/``disabled`` + # lookups and by ``hermes plugins list``. For a flat plugin at + # ``plugins/disk-cleanup/`` the key is ``disk-cleanup``; for a nested + # category plugin at ``plugins/image_gen/openai/`` the key is + # ``image_gen/openai``. When empty, falls back to ``name``. + key: str = "" @dataclass @@ -366,6 +386,33 @@ class PluginContext: self.manifest.name, engine.name, ) + # -- image gen provider registration ------------------------------------ + + def register_image_gen_provider(self, provider) -> None: + """Register an image generation backend. + + ``provider`` must be an instance of + :class:`agent.image_gen_provider.ImageGenProvider`. The + ``provider.name`` attribute is what ``image_gen.provider`` in + ``config.yaml`` matches against when routing ``image_generate`` + tool calls. + """ + from agent.image_gen_provider import ImageGenProvider + from agent.image_gen_registry import register_provider + + if not isinstance(provider, ImageGenProvider): + logger.warning( + "Plugin '%s' tried to register an image_gen provider that does " + "not inherit from ImageGenProvider. Ignoring.", + self.manifest.name, + ) + return + register_provider(provider) + logger.info( + "Plugin '%s' registered image_gen provider: %s", + self.manifest.name, provider.name, + ) + # -- hook registration -------------------------------------------------- def register_hook(self, hook_name: str, callback: Callable) -> None: @@ -465,11 +512,16 @@ class PluginManager: manifests: List[PluginManifest] = [] # 1. Bundled plugins (/plugins//) - # Repo-shipped generic plugins live next to hermes_cli/. Memory and - # context_engine subdirs are handled by their own discovery paths, so - # skip those names here. Bundled plugins are discovered (so they - # show up in `hermes plugins`) but only loaded when added to - # `plugins.enabled` in config.yaml — opt-in like any other plugin. + # + # Repo-shipped plugins live next to hermes_cli/. Two layouts are + # supported (see ``_scan_directory`` for details): + # + # - flat: ``plugins/disk-cleanup/plugin.yaml`` (standalone) + # - category: ``plugins/image_gen/openai/plugin.yaml`` (backend) + # + # ``memory/`` and ``context_engine/`` are skipped at the top level — + # they have their own discovery systems. Porting those to the + # category-namespace ``kind: exclusive`` model is a future PR. repo_plugins = Path(__file__).resolve().parent.parent / "plugins" manifests.extend( self._scan_directory( @@ -492,36 +544,69 @@ class PluginManager: manifests.extend(self._scan_entry_points()) # Load each manifest (skip user-disabled plugins). - # Later sources override earlier ones on name collision — user plugins - # take precedence over bundled, project plugins take precedence over - # user. Dedup here so we only load the final winner. + # Later sources override earlier ones on key collision — user + # plugins take precedence over bundled, project plugins take + # precedence over user. Dedup here so we only load the final + # winner. Keys are path-derived (``image_gen/openai``, + # ``disk-cleanup``) so ``tts/openai`` and ``image_gen/openai`` + # don't collide even when both manifests say ``name: openai``. disabled = _get_disabled_plugins() enabled = _get_enabled_plugins() # None = opt-in default (nothing enabled) winners: Dict[str, PluginManifest] = {} for manifest in manifests: - winners[manifest.name] = manifest + winners[manifest.key or manifest.name] = manifest for manifest in winners.values(): - # Explicit disable always wins. - if manifest.name in disabled: + lookup_key = manifest.key or manifest.name + + # Explicit disable always wins (matches on key or on legacy + # bare name for back-compat with existing user configs). + if lookup_key in disabled or manifest.name in disabled: loaded = LoadedPlugin(manifest=manifest, enabled=False) loaded.error = "disabled via config" - self._plugins[manifest.name] = loaded - logger.debug("Skipping disabled plugin '%s'", manifest.name) + self._plugins[lookup_key] = loaded + logger.debug("Skipping disabled plugin '%s'", lookup_key) continue - # Opt-in gate: plugins must be in the enabled allow-list. - # If the allow-list is missing (None), treat as "nothing enabled" - # — users have to explicitly enable plugins to load them. - # Memory and context_engine providers are excluded from this gate - # since they have their own single-select config (memory.provider - # / context.engine), not the enabled list. - if enabled is None or manifest.name not in enabled: + + # Exclusive plugins (memory providers) have their own + # discovery/activation path. The general loader records the + # manifest for introspection but does not load the module. + if manifest.kind == "exclusive": loaded = LoadedPlugin(manifest=manifest, enabled=False) - loaded.error = "not enabled in config (run `hermes plugins enable {}` to activate)".format( - manifest.name + loaded.error = ( + "exclusive plugin — activate via .provider config" ) - self._plugins[manifest.name] = loaded + self._plugins[lookup_key] = loaded logger.debug( - "Skipping '%s' (not in plugins.enabled)", manifest.name + "Skipping '%s' (exclusive, handled by category discovery)", + lookup_key, + ) + continue + + # Built-in backends auto-load — they ship with hermes and must + # just work. Selection among them (e.g. which image_gen backend + # services calls) is driven by ``.provider`` config, + # enforced by the tool wrapper. + if manifest.kind == "backend" and manifest.source == "bundled": + self._load_plugin(manifest) + continue + + # Everything else (standalone, user-installed backends, + # entry-point plugins) is opt-in via plugins.enabled. + # Accept both the path-derived key and the legacy bare name + # so existing configs keep working. + is_enabled = ( + enabled is not None + and (lookup_key in enabled or manifest.name in enabled) + ) + if not is_enabled: + loaded = LoadedPlugin(manifest=manifest, enabled=False) + loaded.error = ( + "not enabled in config (run `hermes plugins enable {}` to activate)" + .format(lookup_key) + ) + self._plugins[lookup_key] = loaded + logger.debug( + "Skipping '%s' (not in plugins.enabled)", lookup_key ) continue self._load_plugin(manifest) @@ -545,9 +630,37 @@ class PluginManager: ) -> List[PluginManifest]: """Read ``plugin.yaml`` manifests from subdirectories of *path*. - *skip_names* is an optional allow-list of names to ignore (used - for the bundled scan to exclude ``memory`` / ``context_engine`` - subdirs that have their own discovery path). + Supports two layouts, mixed freely: + + * **Flat** — ``//plugin.yaml``. Key is + ```` (e.g. ``disk-cleanup``). + * **Category** — ``///plugin.yaml``, + where the ```` directory itself has no ``plugin.yaml``. + Key is ``/`` (e.g. ``image_gen/openai``). + Depth is capped at two segments. + + *skip_names* is an optional allow-list of names to ignore at the + top level (kept for back-compat; the current call sites no longer + pass it now that categories are first-class). + """ + return self._scan_directory_level( + path, source, skip_names=skip_names, prefix="", depth=0 + ) + + def _scan_directory_level( + self, + path: Path, + source: str, + *, + skip_names: Optional[Set[str]], + prefix: str, + depth: int, + ) -> List[PluginManifest]: + """Recursive implementation of :meth:`_scan_directory`. + + ``prefix`` is the category path already accumulated ("" at root, + "image_gen" one level in). ``depth`` is the recursion depth; we + cap at 2 so ``/a/b/c/`` is ignored. """ manifests: List[PluginManifest] = [] if not path.is_dir(): @@ -556,37 +669,88 @@ class PluginManager: for child in sorted(path.iterdir()): if not child.is_dir(): continue - if skip_names and child.name in skip_names: + if depth == 0 and skip_names and child.name in skip_names: continue manifest_file = child / "plugin.yaml" if not manifest_file.exists(): manifest_file = child / "plugin.yml" - if not manifest_file.exists(): - logger.debug("Skipping %s (no plugin.yaml)", child) + + if manifest_file.exists(): + manifest = self._parse_manifest( + manifest_file, child, source, prefix + ) + if manifest is not None: + manifests.append(manifest) continue - try: - if yaml is None: - logger.warning("PyYAML not installed – cannot load %s", manifest_file) - continue - data = yaml.safe_load(manifest_file.read_text()) or {} - manifest = PluginManifest( - name=data.get("name", child.name), - version=str(data.get("version", "")), - description=data.get("description", ""), - author=data.get("author", ""), - requires_env=data.get("requires_env", []), - provides_tools=data.get("provides_tools", []), - provides_hooks=data.get("provides_hooks", []), - source=source, - path=str(child), + # No manifest at this level. If we're still within the depth + # cap, treat this directory as a category namespace and recurse + # one level in looking for children with manifests. + if depth >= 1: + logger.debug("Skipping %s (no plugin.yaml, depth cap reached)", child) + continue + + sub_prefix = f"{prefix}/{child.name}" if prefix else child.name + manifests.extend( + self._scan_directory_level( + child, + source, + skip_names=None, + prefix=sub_prefix, + depth=depth + 1, ) - manifests.append(manifest) - except Exception as exc: - logger.warning("Failed to parse %s: %s", manifest_file, exc) + ) return manifests + def _parse_manifest( + self, + manifest_file: Path, + plugin_dir: Path, + source: str, + prefix: str, + ) -> Optional[PluginManifest]: + """Parse a single ``plugin.yaml`` into a :class:`PluginManifest`. + + Returns ``None`` on parse failure (logs a warning). + """ + try: + if yaml is None: + logger.warning("PyYAML not installed – cannot load %s", manifest_file) + return None + data = yaml.safe_load(manifest_file.read_text()) or {} + + name = data.get("name", plugin_dir.name) + key = f"{prefix}/{plugin_dir.name}" if prefix else name + + raw_kind = data.get("kind", "standalone") + if not isinstance(raw_kind, str): + raw_kind = "standalone" + kind = raw_kind.strip().lower() + if kind not in _VALID_PLUGIN_KINDS: + logger.warning( + "Plugin %s: unknown kind '%s' (valid: %s); treating as 'standalone'", + key, raw_kind, ", ".join(sorted(_VALID_PLUGIN_KINDS)), + ) + kind = "standalone" + + return PluginManifest( + name=name, + version=str(data.get("version", "")), + description=data.get("description", ""), + author=data.get("author", ""), + requires_env=data.get("requires_env", []), + provides_tools=data.get("provides_tools", []), + provides_hooks=data.get("provides_hooks", []), + source=source, + path=str(plugin_dir), + kind=kind, + key=key, + ) + except Exception as exc: + logger.warning("Failed to parse %s: %s", manifest_file, exc) + return None + # ----------------------------------------------------------------------- # Entry-point scanning # ----------------------------------------------------------------------- @@ -609,6 +773,7 @@ class PluginManager: name=ep.name, source="entrypoint", path=ep.value, + key=ep.name, ) manifests.append(manifest) except Exception as exc: @@ -670,10 +835,16 @@ class PluginManager: loaded.error = str(exc) logger.warning("Failed to load plugin '%s': %s", manifest.name, exc) - self._plugins[manifest.name] = loaded + self._plugins[manifest.key or manifest.name] = loaded def _load_directory_module(self, manifest: PluginManifest) -> types.ModuleType: - """Import a directory-based plugin as ``hermes_plugins.``.""" + """Import a directory-based plugin as ``hermes_plugins.``. + + The module slug is derived from ``manifest.key`` so category-namespaced + plugins (``image_gen/openai``) import as + ``hermes_plugins.image_gen__openai`` without colliding with any + future ``tts/openai``. + """ plugin_dir = Path(manifest.path) # type: ignore[arg-type] init_file = plugin_dir / "__init__.py" if not init_file.exists(): @@ -686,7 +857,9 @@ class PluginManager: ns_pkg.__package__ = _NS_PARENT sys.modules[_NS_PARENT] = ns_pkg - module_name = f"{_NS_PARENT}.{manifest.name.replace('-', '_')}" + key = manifest.key or manifest.name + slug = key.replace("/", "__").replace("-", "_") + module_name = f"{_NS_PARENT}.{slug}" spec = importlib.util.spec_from_file_location( module_name, init_file, @@ -767,10 +940,12 @@ class PluginManager: def list_plugins(self) -> List[Dict[str, Any]]: """Return a list of info dicts for all discovered plugins.""" result: List[Dict[str, Any]] = [] - for name, loaded in sorted(self._plugins.items()): + for key, loaded in sorted(self._plugins.items()): result.append( { - "name": name, + "name": loaded.manifest.name, + "key": loaded.manifest.key or loaded.manifest.name, + "kind": loaded.manifest.kind, "version": loaded.manifest.version, "description": loaded.manifest.description, "source": loaded.manifest.source, diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index 1764474aa9..00c3f64bcf 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -427,6 +427,16 @@ def determine_api_mode(provider: str, base_url: str = "") -> str: """ pdef = get_provider(provider) if pdef is not None: + # Even for known providers, check URL heuristics for special endpoints + # (e.g. kimi /coding endpoint needs anthropic_messages even on 'custom') + if base_url: + url_lower = base_url.rstrip("/").lower() + if "api.kimi.com/coding" in url_lower: + return "anthropic_messages" + if url_lower.endswith("/anthropic") or "api.anthropic.com" in url_lower: + return "anthropic_messages" + if "api.openai.com" in url_lower: + return "codex_responses" return TRANSPORT_TO_API_MODE.get(pdef.transport, "chat_completions") # Direct provider checks for providers not in HERMES_OVERLAYS @@ -439,6 +449,8 @@ def determine_api_mode(provider: str, base_url: str = "") -> str: hostname = base_url_hostname(base_url) if url_lower.endswith("/anthropic") or hostname == "api.anthropic.com": return "anthropic_messages" + if hostname == "api.kimi.com" and "/coding" in url_lower: + return "anthropic_messages" if hostname == "api.openai.com": return "codex_responses" if hostname.startswith("bedrock-runtime.") and base_url_host_matches(base_url, "amazonaws.com"): diff --git a/hermes_cli/runtime_provider.py b/hermes_cli/runtime_provider.py index 62f1407cc7..922946e2ad 100644 --- a/hermes_cli/runtime_provider.py +++ b/hermes_cli/runtime_provider.py @@ -46,6 +46,9 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]: protocol under a ``/anthropic`` suffix — treat those as ``anthropic_messages`` transport instead of the default ``chat_completions``. + - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the + Anthropic Messages protocol (the /coding route accepts Claude + Code's native request shape). """ normalized = (base_url or "").strip().lower().rstrip("/") hostname = base_url_hostname(base_url) @@ -55,6 +58,8 @@ def _detect_api_mode_for_url(base_url: str) -> Optional[str]: return "codex_responses" if normalized.endswith("/anthropic"): return "anthropic_messages" + if hostname == "api.kimi.com" and "/coding" in normalized: + return "anthropic_messages" return None @@ -205,7 +210,8 @@ def _resolve_runtime_from_pool_entry( api_mode = opencode_model_api_mode(provider, model_cfg.get("default", "")) else: # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, - # api.openai.com → codex_responses, api.x.ai → codex_responses). + # Kimi /coding, api.openai.com → codex_responses, api.x.ai → + # codex_responses). detected = _detect_api_mode_for_url(base_url) if detected: api_mode = detected @@ -660,7 +666,8 @@ def _resolve_explicit_runtime( if configured_mode: api_mode = configured_mode else: - # Auto-detect Anthropic-compatible endpoints (/anthropic suffix). + # Auto-detect from URL (Anthropic /anthropic suffix, + # api.openai.com → Responses, Kimi /coding, etc.). detected = _detect_api_mode_for_url(base_url) if detected: api_mode = detected diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index d7eb7b734a..1a620d62b3 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -408,13 +408,36 @@ def _print_setup_summary(config: dict, hermes_home): ("Browser Automation", False, missing_browser_hint) ) - # FAL (image generation) + # Image generation — FAL (direct or via Nous), or any plugin-registered + # provider (OpenAI, etc.) if subscription_features.image_gen.managed_by_nous: tool_status.append(("Image Generation (Nous subscription)", True, None)) elif subscription_features.image_gen.available: tool_status.append(("Image Generation", True, None)) else: - tool_status.append(("Image Generation", False, "FAL_KEY")) + # Fall back to probing plugin-registered providers so OpenAI-only + # setups don't show as "missing FAL_KEY". + _img_backend = None + try: + from agent.image_gen_registry import list_providers + from hermes_cli.plugins import _ensure_plugins_discovered + + _ensure_plugins_discovered() + for _p in list_providers(): + if _p.name == "fal": + continue + try: + if _p.is_available(): + _img_backend = _p.display_name + break + except Exception: + continue + except Exception: + pass + if _img_backend: + tool_status.append((f"Image Generation ({_img_backend})", True, None)) + else: + tool_status.append(("Image Generation", False, "FAL_KEY or OPENAI_API_KEY")) # TTS — show configured provider tts_provider = config.get("tts", {}).get("provider", "edge") diff --git a/hermes_cli/tips.py b/hermes_cli/tips.py index 71bace524a..24acc15f53 100644 --- a/hermes_cli/tips.py +++ b/hermes_cli/tips.py @@ -127,7 +127,7 @@ TIPS = [ # --- Tools & Capabilities --- "execute_code runs Python scripts that call Hermes tools programmatically — results stay out of context.", - "delegate_task spawns up to 3 concurrent sub-agents with isolated contexts for parallel work.", + "delegate_task spawns up to 3 concurrent sub-agents by default (configurable via delegation.max_concurrent_children) with isolated contexts for parallel work.", "web_extract works on PDF URLs — pass any PDF link and it converts to markdown.", "search_files is ripgrep-backed and faster than grep — use it instead of terminal grep.", "patch uses 9 fuzzy matching strategies so minor whitespace differences won't break edits.", diff --git a/hermes_cli/tools_config.py b/hermes_cli/tools_config.py index f91a0e037b..afc99e56c5 100644 --- a/hermes_cli/tools_config.py +++ b/hermes_cli/tools_config.py @@ -842,6 +842,51 @@ def _configure_toolset(ts_key: str, config: dict): _configure_simple_requirements(ts_key) +def _plugin_image_gen_providers() -> list[dict]: + """Build picker-row dicts from plugin-registered image gen providers. + + Each returned dict looks like a regular ``TOOL_CATEGORIES`` provider + row but carries an ``image_gen_plugin_name`` marker so downstream + code (config writing, model picker) knows to route through the + plugin registry instead of the in-tree FAL backend. + + FAL is skipped — it's already exposed by the hardcoded + ``TOOL_CATEGORIES["image_gen"]`` entries. When FAL gets ported to + a plugin in a follow-up PR, the hardcoded entries go away and this + function surfaces it alongside OpenAI automatically. + """ + try: + from agent.image_gen_registry import list_providers + from hermes_cli.plugins import _ensure_plugins_discovered + + _ensure_plugins_discovered() + providers = list_providers() + except Exception: + return [] + + rows: list[dict] = [] + for provider in providers: + if getattr(provider, "name", None) == "fal": + # FAL has its own hardcoded rows today. + continue + try: + schema = provider.get_setup_schema() + except Exception: + continue + if not isinstance(schema, dict): + continue + rows.append( + { + "name": schema.get("name", provider.display_name), + "badge": schema.get("badge", ""), + "tag": schema.get("tag", ""), + "env_vars": schema.get("env_vars", []), + "image_gen_plugin_name": provider.name, + } + ) + return rows + + def _visible_providers(cat: dict, config: dict) -> list[dict]: """Return provider entries visible for the current auth/config state.""" features = get_nous_subscription_features(config) @@ -852,6 +897,12 @@ def _visible_providers(cat: dict, config: dict) -> list[dict]: if provider.get("requires_nous_auth") and not features.nous_auth_present: continue visible.append(provider) + + # Inject plugin-registered image_gen backends (OpenAI today, more + # later) so the picker lists them alongside FAL / Nous Subscription. + if cat.get("name") == "Image Generation": + visible.extend(_plugin_image_gen_providers()) + return visible @@ -871,7 +922,24 @@ def _toolset_needs_configuration_prompt(ts_key: str, config: dict) -> bool: browser_cfg = config.get("browser", {}) return not isinstance(browser_cfg, dict) or "cloud_provider" not in browser_cfg if ts_key == "image_gen": - return not fal_key_is_configured() + # Satisfied when the in-tree FAL backend is configured OR any + # plugin-registered image gen provider is available. + if fal_key_is_configured(): + return False + try: + from agent.image_gen_registry import list_providers + from hermes_cli.plugins import _ensure_plugins_discovered + + _ensure_plugins_discovered() + for provider in list_providers(): + try: + if provider.is_available(): + return False + except Exception: + continue + except Exception: + pass + return True return not _toolset_has_keys(ts_key, config) @@ -1096,6 +1164,88 @@ def _configure_imagegen_model(backend_name: str, config: dict) -> None: _print_success(f" Model set to: {chosen}") +def _plugin_image_gen_catalog(plugin_name: str): + """Return ``(catalog_dict, default_model_id)`` for a plugin provider. + + ``catalog_dict`` is shaped like the legacy ``FAL_MODELS`` table — + ``{model_id: {"display", "speed", "strengths", "price", ...}}`` — + so the existing picker code paths work without change. Returns + ``({}, None)`` if the provider isn't registered or has no models. + """ + try: + from agent.image_gen_registry import get_provider + from hermes_cli.plugins import _ensure_plugins_discovered + + _ensure_plugins_discovered() + provider = get_provider(plugin_name) + except Exception: + return {}, None + if provider is None: + return {}, None + try: + models = provider.list_models() or [] + default = provider.default_model() + except Exception: + return {}, None + catalog = {m["id"]: m for m in models if isinstance(m, dict) and "id" in m} + return catalog, default + + +def _configure_imagegen_model_for_plugin(plugin_name: str, config: dict) -> None: + """Prompt the user to pick a model for a plugin-registered backend. + + Writes selection to ``image_gen.model``. Mirrors + :func:`_configure_imagegen_model` but sources its catalog from the + plugin registry instead of :data:`IMAGEGEN_BACKENDS`. + """ + catalog, default_model = _plugin_image_gen_catalog(plugin_name) + if not catalog: + return + + cur_cfg = config.setdefault("image_gen", {}) + if not isinstance(cur_cfg, dict): + cur_cfg = {} + config["image_gen"] = cur_cfg + current_model = cur_cfg.get("model") or default_model + if current_model not in catalog: + current_model = default_model + + model_ids = list(catalog.keys()) + ordered = [current_model] + [m for m in model_ids if m != current_model] + + widths = { + "model": max(len(m) for m in model_ids), + "speed": max((len(catalog[m].get("speed", "")) for m in model_ids), default=6), + "strengths": max((len(catalog[m].get("strengths", "")) for m in model_ids), default=0), + } + + print() + header = ( + f" {'Model':<{widths['model']}} " + f"{'Speed':<{widths['speed']}} " + f"{'Strengths':<{widths['strengths']}} " + f"Price" + ) + print(color(header, Colors.CYAN)) + + rows = [] + for mid in ordered: + row = _format_imagegen_model_row(mid, catalog[mid], widths) + if mid == current_model: + row += " ← currently in use" + rows.append(row) + + idx = _prompt_choice( + f" Choose {plugin_name} model:", + rows, + default=0, + ) + + chosen = ordered[idx] + cur_cfg["model"] = chosen + _print_success(f" Model set to: {chosen}") + + def _configure_provider(provider: dict, config: dict): """Configure a single provider - prompt for API keys and set config.""" env_vars = provider.get("env_vars", []) @@ -1152,10 +1302,28 @@ def _configure_provider(provider: dict, config: dict): _print_success(f" {provider['name']} - no configuration needed!") if managed_feature: _print_info(" Requests for this tool will be billed to your Nous subscription.") + # Plugin-registered image_gen provider: write image_gen.provider + # and route model selection to the plugin's own catalog. + plugin_name = provider.get("image_gen_plugin_name") + if plugin_name: + img_cfg = config.setdefault("image_gen", {}) + if not isinstance(img_cfg, dict): + img_cfg = {} + config["image_gen"] = img_cfg + img_cfg["provider"] = plugin_name + _print_success(f" image_gen.provider set to: {plugin_name}") + _configure_imagegen_model_for_plugin(plugin_name, config) + return # Imagegen backends prompt for model selection after backend pick. backend = provider.get("imagegen_backend") if backend: _configure_imagegen_model(backend, config) + # In-tree FAL is the only non-plugin backend today. Keep + # image_gen.provider clear so the dispatch shim falls through + # to the legacy FAL path. + img_cfg = config.setdefault("image_gen", {}) + if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"): + img_cfg["provider"] = "fal" return # Prompt for each required env var @@ -1190,10 +1358,23 @@ def _configure_provider(provider: dict, config: dict): if all_configured: _print_success(f" {provider['name']} configured!") + plugin_name = provider.get("image_gen_plugin_name") + if plugin_name: + img_cfg = config.setdefault("image_gen", {}) + if not isinstance(img_cfg, dict): + img_cfg = {} + config["image_gen"] = img_cfg + img_cfg["provider"] = plugin_name + _print_success(f" image_gen.provider set to: {plugin_name}") + _configure_imagegen_model_for_plugin(plugin_name, config) + return # Imagegen backends prompt for model selection after env vars are in. backend = provider.get("imagegen_backend") if backend: _configure_imagegen_model(backend, config) + img_cfg = config.setdefault("image_gen", {}) + if isinstance(img_cfg, dict) and img_cfg.get("provider") not in (None, "", "fal"): + img_cfg["provider"] = "fal" def _configure_simple_requirements(ts_key: str): diff --git a/hermes_cli/web_server.py b/hermes_cli/web_server.py index 6cf1199253..784dc4834d 100644 --- a/hermes_cli/web_server.py +++ b/hermes_cli/web_server.py @@ -16,6 +16,7 @@ import json import logging import os import secrets +import subprocess import sys import threading import time @@ -561,6 +562,138 @@ async def get_status(): } +# --------------------------------------------------------------------------- +# Gateway + update actions (invoked from the Status page). +# +# Both commands are spawned as detached subprocesses so the HTTP request +# returns immediately. stdin is closed (``DEVNULL``) so any stray ``input()`` +# calls fail fast with EOF rather than hanging forever. stdout/stderr are +# streamed to a per-action log file under ``~/.hermes/logs/.log`` so +# the dashboard can tail them back to the user. +# --------------------------------------------------------------------------- + +_ACTION_LOG_DIR: Path = get_hermes_home() / "logs" + +# Short ``name`` (from the URL) → absolute log file path. +_ACTION_LOG_FILES: Dict[str, str] = { + "gateway-restart": "gateway-restart.log", + "hermes-update": "hermes-update.log", +} + +# ``name`` → most recently spawned Popen handle. Used so ``status`` can +# report liveness and exit code without shelling out to ``ps``. +_ACTION_PROCS: Dict[str, subprocess.Popen] = {} + + +def _spawn_hermes_action(subcommand: List[str], name: str) -> subprocess.Popen: + """Spawn ``hermes `` detached and record the Popen handle. + + Uses the running interpreter's ``hermes_cli.main`` module so the action + inherits the same venv/PYTHONPATH the web server is using. + """ + log_file_name = _ACTION_LOG_FILES[name] + _ACTION_LOG_DIR.mkdir(parents=True, exist_ok=True) + log_path = _ACTION_LOG_DIR / log_file_name + log_file = open(log_path, "ab", buffering=0) + log_file.write( + f"\n=== {name} started {time.strftime('%Y-%m-%d %H:%M:%S')} ===\n".encode() + ) + + cmd = [sys.executable, "-m", "hermes_cli.main", *subcommand] + + popen_kwargs: Dict[str, Any] = { + "cwd": str(PROJECT_ROOT), + "stdin": subprocess.DEVNULL, + "stdout": log_file, + "stderr": subprocess.STDOUT, + "env": {**os.environ, "HERMES_NONINTERACTIVE": "1"}, + } + if sys.platform == "win32": + popen_kwargs["creationflags"] = ( + subprocess.CREATE_NEW_PROCESS_GROUP # type: ignore[attr-defined] + | getattr(subprocess, "DETACHED_PROCESS", 0) + ) + else: + popen_kwargs["start_new_session"] = True + + proc = subprocess.Popen(cmd, **popen_kwargs) + _ACTION_PROCS[name] = proc + return proc + + +def _tail_lines(path: Path, n: int) -> List[str]: + """Return the last ``n`` lines of ``path``. Reads the whole file — fine + for our small per-action logs. Binary-decoded with ``errors='replace'`` + so log corruption doesn't 500 the endpoint.""" + if not path.exists(): + return [] + try: + text = path.read_text(errors="replace") + except OSError: + return [] + lines = text.splitlines() + return lines[-n:] if n > 0 else lines + + +@app.post("/api/gateway/restart") +async def restart_gateway(): + """Kick off a ``hermes gateway restart`` in the background.""" + try: + proc = _spawn_hermes_action(["gateway", "restart"], "gateway-restart") + except Exception as exc: + _log.exception("Failed to spawn gateway restart") + raise HTTPException(status_code=500, detail=f"Failed to restart gateway: {exc}") + return { + "ok": True, + "pid": proc.pid, + "name": "gateway-restart", + } + + +@app.post("/api/hermes/update") +async def update_hermes(): + """Kick off ``hermes update`` in the background.""" + try: + proc = _spawn_hermes_action(["update"], "hermes-update") + except Exception as exc: + _log.exception("Failed to spawn hermes update") + raise HTTPException(status_code=500, detail=f"Failed to start update: {exc}") + return { + "ok": True, + "pid": proc.pid, + "name": "hermes-update", + } + + +@app.get("/api/actions/{name}/status") +async def get_action_status(name: str, lines: int = 200): + """Tail an action log and report whether the process is still running.""" + log_file_name = _ACTION_LOG_FILES.get(name) + if log_file_name is None: + raise HTTPException(status_code=404, detail=f"Unknown action: {name}") + + log_path = _ACTION_LOG_DIR / log_file_name + tail = _tail_lines(log_path, min(max(lines, 1), 2000)) + + proc = _ACTION_PROCS.get(name) + if proc is None: + running = False + exit_code: Optional[int] = None + pid: Optional[int] = None + else: + exit_code = proc.poll() + running = exit_code is None + pid = proc.pid + + return { + "name": name, + "running": running, + "exit_code": exit_code, + "pid": pid, + "lines": tail, + } + + @app.get("/api/sessions") async def get_sessions(limit: int = 20, offset: int = 0): try: diff --git a/package-lock.json b/package-lock.json index 9d0ae80cdc..728429e51b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1069,6 +1069,7 @@ } ], "license": "MIT", + "peer": true, "dependencies": { "baseline-browser-mapping": "^2.10.12", "caniuse-lite": "^1.0.30001782", @@ -3911,6 +3912,7 @@ "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.59.1.tgz", "integrity": "sha512-C8oWjPR3F81yljW9o5OxcWzfh6avkVwDD2VYdwIGqTkl+OGFISgypqzfu7dOe4QNLL2aqcWBmI3PMtLIK233lw==", "license": "Apache-2.0", + "peer": true, "dependencies": { "playwright-core": "1.59.1" }, @@ -3929,6 +3931,7 @@ "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.59.1.tgz", "integrity": "sha512-HBV/RJg81z5BiiZ9yPzIiClYV/QMsDCKUyogwH9p3MCP6IYjUFu/MActgYAvK0oWyV9NlwM3GLBjADyWgydVyg==", "license": "Apache-2.0", + "peer": true, "bin": { "playwright-core": "cli.js" }, diff --git a/plugins/image_gen/openai/__init__.py b/plugins/image_gen/openai/__init__.py new file mode 100644 index 0000000000..c1a719f910 --- /dev/null +++ b/plugins/image_gen/openai/__init__.py @@ -0,0 +1,303 @@ +"""OpenAI image generation backend. + +Exposes OpenAI's ``gpt-image-2`` model at three quality tiers as an +:class:`ImageGenProvider` implementation. The tiers are implemented as +three virtual model IDs so the ``hermes tools`` model picker and the +``image_gen.model`` config key behave like any other multi-model backend: + + gpt-image-2-low ~15s fastest, good for iteration + gpt-image-2-medium ~40s default — balanced + gpt-image-2-high ~2min slowest, highest fidelity + +All three hit the same underlying API model (``gpt-image-2``) with a +different ``quality`` parameter. Output is base64 JSON → saved under +``$HERMES_HOME/cache/images/``. + +Selection precedence (first hit wins): + +1. ``OPENAI_IMAGE_MODEL`` env var (escape hatch for scripts / tests) +2. ``image_gen.openai.model`` in ``config.yaml`` +3. ``image_gen.model`` in ``config.yaml`` (when it's one of our tier IDs) +4. :data:`DEFAULT_MODEL` — ``gpt-image-2-medium`` +""" + +from __future__ import annotations + +import logging +import os +from typing import Any, Dict, List, Optional, Tuple + +from agent.image_gen_provider import ( + DEFAULT_ASPECT_RATIO, + ImageGenProvider, + error_response, + resolve_aspect_ratio, + save_b64_image, + success_response, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Model catalog +# --------------------------------------------------------------------------- +# +# All three IDs resolve to the same underlying API model with a different +# ``quality`` setting. ``api_model`` is what gets sent to OpenAI; +# ``quality`` is the knob that changes generation time and output fidelity. + +API_MODEL = "gpt-image-2" + +_MODELS: Dict[str, Dict[str, Any]] = { + "gpt-image-2-low": { + "display": "GPT Image 2 (Low)", + "speed": "~15s", + "strengths": "Fast iteration, lowest cost", + "quality": "low", + }, + "gpt-image-2-medium": { + "display": "GPT Image 2 (Medium)", + "speed": "~40s", + "strengths": "Balanced — default", + "quality": "medium", + }, + "gpt-image-2-high": { + "display": "GPT Image 2 (High)", + "speed": "~2min", + "strengths": "Highest fidelity, strongest prompt adherence", + "quality": "high", + }, +} + +DEFAULT_MODEL = "gpt-image-2-medium" + +_SIZES = { + "landscape": "1536x1024", + "square": "1024x1024", + "portrait": "1024x1536", +} + + +def _load_openai_config() -> Dict[str, Any]: + """Read ``image_gen`` from config.yaml (returns {} on any failure).""" + try: + from hermes_cli.config import load_config + + cfg = load_config() + section = cfg.get("image_gen") if isinstance(cfg, dict) else None + return section if isinstance(section, dict) else {} + except Exception as exc: + logger.debug("Could not load image_gen config: %s", exc) + return {} + + +def _resolve_model() -> Tuple[str, Dict[str, Any]]: + """Decide which tier to use and return ``(model_id, meta)``.""" + env_override = os.environ.get("OPENAI_IMAGE_MODEL") + if env_override and env_override in _MODELS: + return env_override, _MODELS[env_override] + + cfg = _load_openai_config() + openai_cfg = cfg.get("openai") if isinstance(cfg.get("openai"), dict) else {} + candidate: Optional[str] = None + if isinstance(openai_cfg, dict): + value = openai_cfg.get("model") + if isinstance(value, str) and value in _MODELS: + candidate = value + if candidate is None: + top = cfg.get("model") + if isinstance(top, str) and top in _MODELS: + candidate = top + + if candidate is not None: + return candidate, _MODELS[candidate] + + return DEFAULT_MODEL, _MODELS[DEFAULT_MODEL] + + +# --------------------------------------------------------------------------- +# Provider +# --------------------------------------------------------------------------- + + +class OpenAIImageGenProvider(ImageGenProvider): + """OpenAI ``images.generate`` backend — gpt-image-2 at low/medium/high.""" + + @property + def name(self) -> str: + return "openai" + + @property + def display_name(self) -> str: + return "OpenAI" + + def is_available(self) -> bool: + if not os.environ.get("OPENAI_API_KEY"): + return False + try: + import openai # noqa: F401 + except ImportError: + return False + return True + + def list_models(self) -> List[Dict[str, Any]]: + return [ + { + "id": model_id, + "display": meta["display"], + "speed": meta["speed"], + "strengths": meta["strengths"], + "price": "varies", + } + for model_id, meta in _MODELS.items() + ] + + def default_model(self) -> Optional[str]: + return DEFAULT_MODEL + + def get_setup_schema(self) -> Dict[str, Any]: + return { + "name": "OpenAI", + "badge": "paid", + "tag": "gpt-image-2 at low/medium/high quality tiers", + "env_vars": [ + { + "key": "OPENAI_API_KEY", + "prompt": "OpenAI API key", + "url": "https://platform.openai.com/api-keys", + }, + ], + } + + def generate( + self, + prompt: str, + aspect_ratio: str = DEFAULT_ASPECT_RATIO, + **kwargs: Any, + ) -> Dict[str, Any]: + prompt = (prompt or "").strip() + aspect = resolve_aspect_ratio(aspect_ratio) + + if not prompt: + return error_response( + error="Prompt is required and must be a non-empty string", + error_type="invalid_argument", + provider="openai", + aspect_ratio=aspect, + ) + + if not os.environ.get("OPENAI_API_KEY"): + return error_response( + error=( + "OPENAI_API_KEY not set. Run `hermes tools` → Image " + "Generation → OpenAI to configure, or `hermes setup` " + "to add the key." + ), + error_type="auth_required", + provider="openai", + aspect_ratio=aspect, + ) + + try: + import openai + except ImportError: + return error_response( + error="openai Python package not installed (pip install openai)", + error_type="missing_dependency", + provider="openai", + aspect_ratio=aspect, + ) + + tier_id, meta = _resolve_model() + size = _SIZES.get(aspect, _SIZES["square"]) + + # gpt-image-2 returns b64_json unconditionally and REJECTS + # ``response_format`` as an unknown parameter. Don't send it. + payload: Dict[str, Any] = { + "model": API_MODEL, + "prompt": prompt, + "size": size, + "n": 1, + "quality": meta["quality"], + } + + try: + client = openai.OpenAI() + response = client.images.generate(**payload) + except Exception as exc: + logger.debug("OpenAI image generation failed", exc_info=True) + return error_response( + error=f"OpenAI image generation failed: {exc}", + error_type="api_error", + provider="openai", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + data = getattr(response, "data", None) or [] + if not data: + return error_response( + error="OpenAI returned no image data", + error_type="empty_response", + provider="openai", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + first = data[0] + b64 = getattr(first, "b64_json", None) + url = getattr(first, "url", None) + revised_prompt = getattr(first, "revised_prompt", None) + + if b64: + try: + saved_path = save_b64_image(b64, prefix=f"openai_{tier_id}") + except Exception as exc: + return error_response( + error=f"Could not save image to cache: {exc}", + error_type="io_error", + provider="openai", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + image_ref = str(saved_path) + elif url: + # Defensive — gpt-image-2 returns b64 today, but fall back + # gracefully if the API ever changes. + image_ref = url + else: + return error_response( + error="OpenAI response contained neither b64_json nor URL", + error_type="empty_response", + provider="openai", + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + ) + + extra: Dict[str, Any] = {"size": size, "quality": meta["quality"]} + if revised_prompt: + extra["revised_prompt"] = revised_prompt + + return success_response( + image=image_ref, + model=tier_id, + prompt=prompt, + aspect_ratio=aspect, + provider="openai", + extra=extra, + ) + + +# --------------------------------------------------------------------------- +# Plugin entry point +# --------------------------------------------------------------------------- + + +def register(ctx) -> None: + """Plugin entry point — wire ``OpenAIImageGenProvider`` into the registry.""" + ctx.register_image_gen_provider(OpenAIImageGenProvider()) diff --git a/plugins/image_gen/openai/plugin.yaml b/plugins/image_gen/openai/plugin.yaml new file mode 100644 index 0000000000..18e4d86390 --- /dev/null +++ b/plugins/image_gen/openai/plugin.yaml @@ -0,0 +1,7 @@ +name: openai +version: 1.0.0 +description: "OpenAI image generation backend (gpt-image-2). Saves generated images to $HERMES_HOME/cache/images/." +author: NousResearch +kind: backend +requires_env: + - OPENAI_API_KEY diff --git a/run_agent.py b/run_agent.py index a5d22f75c4..8179a71546 100644 --- a/run_agent.py +++ b/run_agent.py @@ -1178,7 +1178,7 @@ class AIAgent: client_kwargs["default_headers"] = copilot_default_headers() elif base_url_host_matches(effective_base, "api.kimi.com"): client_kwargs["default_headers"] = { - "User-Agent": "KimiCLI/1.30.0", + "User-Agent": "claude-code/0.1.0", } elif base_url_host_matches(effective_base, "portal.qwen.ai"): client_kwargs["default_headers"] = _qwen_portal_headers() @@ -2008,6 +2008,22 @@ class AIAgent: self._fallback_activated = False self._fallback_index = 0 + # When the user deliberately swaps primary providers (e.g. openrouter + # → anthropic), drop any fallback entries that target the OLD primary + # or the NEW one. The chain was seeded from config at agent init for + # the original provider — without pruning, a failed turn on the new + # primary silently re-activates the provider the user just rejected, + # which is exactly what was reported during TUI v2 blitz testing + # ("switched to anthropic, tui keeps trying openrouter"). + old_norm = (old_provider or "").strip().lower() + new_norm = (new_provider or "").strip().lower() + if old_norm and new_norm and old_norm != new_norm: + self._fallback_chain = [ + entry for entry in self._fallback_chain + if (entry.get("provider") or "").strip().lower() not in {old_norm, new_norm} + ] + self._fallback_model = self._fallback_chain[0] if self._fallback_chain else None + logging.info( "Model switched in-place: %s (%s) -> %s (%s)", old_model, old_provider, new_model, new_provider, @@ -4295,10 +4311,6 @@ class AIAgent: if self._memory_store: self._memory_store.load_from_disk() - def _responses_tools(self, tools: Optional[List[Dict[str, Any]]] = None) -> Optional[List[Dict[str, Any]]]: - """Convert chat-completions tool schemas to Responses function-tool schemas.""" - return _codex_responses_tools(tools if tools is not None else self.tools) - @staticmethod def _deterministic_call_id(fn_name: str, arguments: str, index: int = 0) -> str: """Generate a deterministic call_id from tool call content. @@ -4322,33 +4334,6 @@ class AIAgent: """Build a valid Responses `function_call.id` (must start with `fc_`).""" return _codex_derive_responses_function_call_id(call_id, response_item_id) - def _chat_messages_to_responses_input(self, messages: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Convert internal chat-style messages to Responses input items.""" - return _codex_chat_messages_to_responses_input(messages) - - def _preflight_codex_input_items(self, raw_items: Any) -> List[Dict[str, Any]]: - return _codex_preflight_codex_input_items(raw_items) - - def _preflight_codex_api_kwargs( - self, - api_kwargs: Any, - *, - allow_stream: bool = False, - ) -> Dict[str, Any]: - return _codex_preflight_codex_api_kwargs(api_kwargs, allow_stream=allow_stream) - - def _extract_responses_message_text(self, item: Any) -> str: - """Extract assistant text from a Responses message output item.""" - return _codex_extract_responses_message_text(item) - - def _extract_responses_reasoning_text(self, item: Any) -> str: - """Extract a compact reasoning text from a Responses reasoning item.""" - return _codex_extract_responses_reasoning_text(item) - - def _normalize_codex_response(self, response: Any) -> tuple[Any, str]: - """Normalize a Responses API object to an assistant_message-like object.""" - return _codex_normalize_codex_response(response) - def _thread_identity(self) -> str: thread = threading.current_thread() return f"{thread.name}:{thread.ident}" @@ -4841,7 +4826,7 @@ class AIAgent: active_client = client or self._ensure_primary_openai_client(reason="codex_create_stream_fallback") fallback_kwargs = dict(api_kwargs) fallback_kwargs["stream"] = True - fallback_kwargs = self._preflight_codex_api_kwargs(fallback_kwargs, allow_stream=True) + fallback_kwargs = self._get_codex_transport().preflight_kwargs(fallback_kwargs, allow_stream=True) stream_or_response = active_client.responses.create(**fallback_kwargs) # Compatibility shim for mocks or providers that still return a concrete response. @@ -5036,7 +5021,7 @@ class AIAgent: self._client_kwargs["default_headers"] = copilot_default_headers() elif base_url_host_matches(base_url, "api.kimi.com"): - self._client_kwargs["default_headers"] = {"User-Agent": "KimiCLI/1.30.0"} + self._client_kwargs["default_headers"] = {"User-Agent": "claude-code/0.1.0"} elif base_url_host_matches(base_url, "portal.qwen.ai"): self._client_kwargs["default_headers"] = _qwen_portal_headers() elif base_url_host_matches(base_url, "chatgpt.com"): @@ -6583,6 +6568,33 @@ class AIAgent: self._anthropic_transport = t return t + def _get_codex_transport(self): + """Return the cached ResponsesApiTransport instance (lazy singleton).""" + t = getattr(self, "_codex_transport", None) + if t is None: + from agent.transports import get_transport + t = get_transport("codex_responses") + self._codex_transport = t + return t + + def _get_chat_completions_transport(self): + """Return the cached ChatCompletionsTransport instance (lazy singleton).""" + t = getattr(self, "_chat_completions_transport", None) + if t is None: + from agent.transports import get_transport + t = get_transport("chat_completions") + self._chat_completions_transport = t + return t + + def _get_bedrock_transport(self): + """Return the cached BedrockTransport instance (lazy singleton).""" + t = getattr(self, "_bedrock_transport", None) + if t is None: + from agent.transports import get_transport + t = get_transport("bedrock_converse") + self._bedrock_transport = t + return t + def _prepare_anthropic_messages_for_api(self, api_messages: list) -> list: if not any( isinstance(msg, dict) and self._content_has_image_parts(msg.get("content")) @@ -6722,31 +6734,20 @@ class AIAgent: # AWS Bedrock native Converse API — bypasses the OpenAI client entirely. # The adapter handles message/tool conversion and boto3 calls directly. if self.api_mode == "bedrock_converse": - from agent.bedrock_adapter import build_converse_kwargs + _bt = self._get_bedrock_transport() region = getattr(self, "_bedrock_region", None) or "us-east-1" guardrail = getattr(self, "_bedrock_guardrail_config", None) - return { - "__bedrock_converse__": True, - "__bedrock_region__": region, - **build_converse_kwargs( - model=self.model, - messages=api_messages, - tools=self.tools, - max_tokens=self.max_tokens or 4096, - temperature=None, # Let the model use its default - guardrail_config=guardrail, - ), - } + return _bt.build_kwargs( + model=self.model, + messages=api_messages, + tools=self.tools, + max_tokens=self.max_tokens or 4096, + region=region, + guardrail_config=guardrail, + ) if self.api_mode == "codex_responses": - instructions = "" - payload_messages = api_messages - if api_messages and api_messages[0].get("role") == "system": - instructions = str(api_messages[0].get("content") or "").strip() - payload_messages = api_messages[1:] - if not instructions: - instructions = DEFAULT_AGENT_IDENTITY - + _ct = self._get_codex_transport() is_github_responses = ( base_url_host_matches(self.base_url, "models.github.ai") or base_url_host_matches(self.base_url, "api.githubcopilot.com") @@ -6758,320 +6759,118 @@ class AIAgent: and "/backend-api/codex" in self._base_url_lower ) ) - - # Resolve reasoning effort: config > default (medium) - reasoning_effort = "medium" - reasoning_enabled = True - if self.reasoning_config and isinstance(self.reasoning_config, dict): - if self.reasoning_config.get("enabled") is False: - reasoning_enabled = False - elif self.reasoning_config.get("effort"): - reasoning_effort = self.reasoning_config["effort"] - - # Clamp effort levels not supported by the Responses API model. - # GPT-5.4 supports none/low/medium/high/xhigh but not "minimal". - # "minimal" is valid on OpenRouter and GPT-5 but fails on 5.2/5.4. - _effort_clamp = {"minimal": "low"} - reasoning_effort = _effort_clamp.get(reasoning_effort, reasoning_effort) - - kwargs = { - "model": self.model, - "instructions": instructions, - "input": self._chat_messages_to_responses_input(payload_messages), - "tools": self._responses_tools(), - "tool_choice": "auto", - "parallel_tool_calls": True, - "store": False, - } - - if not is_github_responses: - kwargs["prompt_cache_key"] = self.session_id - is_xai_responses = self.provider == "xai" or self._base_url_hostname == "api.x.ai" - - if reasoning_enabled and is_xai_responses: - # xAI reasons automatically — no effort param, just include encrypted content - kwargs["include"] = ["reasoning.encrypted_content"] - elif reasoning_enabled: - if is_github_responses: - # Copilot's Responses route advertises reasoning-effort support, - # but not OpenAI-specific prompt cache or encrypted reasoning - # fields. Keep the payload to the documented subset. - github_reasoning = self._github_models_reasoning_extra_body() - if github_reasoning is not None: - kwargs["reasoning"] = github_reasoning - else: - kwargs["reasoning"] = {"effort": reasoning_effort, "summary": "auto"} - kwargs["include"] = ["reasoning.encrypted_content"] - elif not is_github_responses and not is_xai_responses: - kwargs["include"] = [] - - if self.request_overrides: - kwargs.update(self.request_overrides) - - if self.max_tokens is not None and not is_codex_backend: - kwargs["max_output_tokens"] = self.max_tokens - - if is_xai_responses and getattr(self, "session_id", None): - kwargs["extra_headers"] = {"x-grok-conv-id": self.session_id} - - return kwargs - - sanitized_messages = api_messages - needs_sanitization = False - for msg in api_messages: - if not isinstance(msg, dict): - continue - if "codex_reasoning_items" in msg: - needs_sanitization = True - break - - tool_calls = msg.get("tool_calls") - if isinstance(tool_calls, list): - for tool_call in tool_calls: - if not isinstance(tool_call, dict): - continue - if "call_id" in tool_call or "response_item_id" in tool_call: - needs_sanitization = True - break - if needs_sanitization: - break - - if needs_sanitization: - sanitized_messages = copy.deepcopy(api_messages) - for msg in sanitized_messages: - if not isinstance(msg, dict): - continue - - # Codex-only replay state must not leak into strict chat-completions APIs. - msg.pop("codex_reasoning_items", None) - - tool_calls = msg.get("tool_calls") - if isinstance(tool_calls, list): - for tool_call in tool_calls: - if isinstance(tool_call, dict): - tool_call.pop("call_id", None) - tool_call.pop("response_item_id", None) - - # Qwen portal: normalize content to list-of-dicts, inject cache_control. - # Must run AFTER codex sanitization so we transform the final messages. - # If sanitization already deepcopied, reuse that copy (in-place). - if self._is_qwen_portal(): - if sanitized_messages is api_messages: - # No sanitization was done — we need our own copy. - sanitized_messages = self._qwen_prepare_chat_messages(sanitized_messages) - else: - # Already a deepcopy — transform in place to avoid a second deepcopy. - self._qwen_prepare_chat_messages_inplace(sanitized_messages) - - # GPT-5 and Codex models respond better to 'developer' than 'system' - # for instruction-following. Swap the role at the API boundary so - # internal message representation stays uniform ("system"). - _model_lower = (self.model or "").lower() - if ( - sanitized_messages - and sanitized_messages[0].get("role") == "system" - and any(p in _model_lower for p in DEVELOPER_ROLE_MODELS) - ): - # Shallow-copy the list + first message only — rest stays shared. - sanitized_messages = list(sanitized_messages) - sanitized_messages[0] = {**sanitized_messages[0], "role": "developer"} - - provider_preferences = {} - if self.providers_allowed: - provider_preferences["only"] = self.providers_allowed - if self.providers_ignored: - provider_preferences["ignore"] = self.providers_ignored - if self.providers_order: - provider_preferences["order"] = self.providers_order - if self.provider_sort: - provider_preferences["sort"] = self.provider_sort - if self.provider_require_parameters: - provider_preferences["require_parameters"] = True - if self.provider_data_collection: - provider_preferences["data_collection"] = self.provider_data_collection - - api_kwargs = { - "model": self.model, - "messages": sanitized_messages, - "timeout": self._resolved_api_call_timeout(), - } - try: - from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE - except Exception: - _fixed_temperature_for_model = None - OMIT_TEMPERATURE = None - if _fixed_temperature_for_model is not None: - fixed_temperature = _fixed_temperature_for_model(self.model, self.base_url) - if fixed_temperature is OMIT_TEMPERATURE: - api_kwargs.pop("temperature", None) - elif fixed_temperature is not None: - api_kwargs["temperature"] = fixed_temperature - if self._is_qwen_portal(): - api_kwargs["metadata"] = { - "sessionId": self.session_id or "hermes", - "promptId": str(uuid.uuid4()), - } - if self.tools: - api_kwargs["tools"] = self.tools - - # ── max_tokens for chat_completions ────────────────────────────── - # Priority: ephemeral override (error recovery / length-continuation - # boost) > user-configured max_tokens > provider-specific defaults. - _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) - if _ephemeral_out is not None: - self._ephemeral_max_output_tokens = None # consume immediately - api_kwargs.update(self._max_tokens_param(_ephemeral_out)) - elif self.max_tokens is not None: - api_kwargs.update(self._max_tokens_param(self.max_tokens)) - elif "integrate.api.nvidia.com" in self._base_url_lower: - # NVIDIA NIM defaults to a very low max_tokens when omitted, - # causing models like GLM-4.7 to truncate immediately (thinking - # tokens alone exhaust the budget). 16384 provides adequate room. - api_kwargs.update(self._max_tokens_param(16384)) - elif self._is_qwen_portal(): - # Qwen Portal defaults to a very low max_tokens when omitted. - # Reasoning models (qwen3-coder-plus) exhaust that budget on - # thinking tokens alone, causing the portal to return - # finish_reason="stop" with truncated output — the agent sees - # this as an intentional stop and exits the loop. Send 65536 - # (the documented max output for qwen3-coder models) so the - # model has adequate output budget for tool calls. - api_kwargs.update(self._max_tokens_param(65536)) - elif ( - base_url_host_matches(self.base_url, "api.kimi.com") - or base_url_host_matches(self.base_url, "moonshot.ai") - or base_url_host_matches(self.base_url, "moonshot.cn") - ): - # Kimi/Moonshot defaults to a low max_tokens when omitted. - # Reasoning tokens share the output budget — without an explicit - # value the model can exhaust it on thinking alone, causing - # "Response truncated due to output length limit". 32000 matches - # Kimi CLI's default (see MoonshotAI/kimi-cli kimi.py generate()). - api_kwargs.update(self._max_tokens_param(32000)) - # Kimi requires reasoning_effort as a top-level chat completions - # parameter (not inside extra_body). Mirror Kimi CLI's - # with_generation_kwargs(reasoning_effort=...) / with_thinking(): - # when thinking is disabled, Kimi CLI omits reasoning_effort - # entirely (maps to None). - _kimi_thinking_off = bool( - self.reasoning_config - and isinstance(self.reasoning_config, dict) - and self.reasoning_config.get("enabled") is False + return _ct.build_kwargs( + model=self.model, + messages=api_messages, + tools=self.tools, + reasoning_config=self.reasoning_config, + session_id=getattr(self, "session_id", None), + max_tokens=self.max_tokens, + request_overrides=self.request_overrides, + is_github_responses=is_github_responses, + is_codex_backend=is_codex_backend, + is_xai_responses=is_xai_responses, + github_reasoning_extra=self._github_models_reasoning_extra_body() if is_github_responses else None, ) - if not _kimi_thinking_off: - _kimi_effort = "medium" - if self.reasoning_config and isinstance(self.reasoning_config, dict): - _e = (self.reasoning_config.get("effort") or "").strip().lower() - if _e in ("low", "medium", "high"): - _kimi_effort = _e - api_kwargs["reasoning_effort"] = _kimi_effort - elif (self._is_openrouter_url() or "nousresearch" in self._base_url_lower) and "claude" in (self.model or "").lower(): - # OpenRouter and Nous Portal translate requests to Anthropic's - # Messages API, which requires max_tokens as a mandatory field. - # When we omit it, the proxy picks a default that can be too - # low — the model spends its output budget on thinking and has - # almost nothing left for the actual response (especially large - # tool calls like write_file). Sending the model's real output - # limit ensures full capacity. - try: - from agent.anthropic_adapter import _get_anthropic_max_output - _model_output_limit = _get_anthropic_max_output(self.model) - api_kwargs["max_tokens"] = _model_output_limit - except Exception: - pass # fail open — let the proxy pick its default - extra_body = {} + # ── chat_completions (default) ───────────────────────────────────── + _ct = self._get_chat_completions_transport() - _is_openrouter = self._is_openrouter_url() - _is_github_models = ( + # Provider detection flags + _is_qwen = self._is_qwen_portal() + _is_or = self._is_openrouter_url() + _is_gh = ( base_url_host_matches(self._base_url_lower, "models.github.ai") or base_url_host_matches(self._base_url_lower, "api.githubcopilot.com") ) - - # Provider preferences (only, ignore, order, sort) are OpenRouter- - # specific. Only send to OpenRouter-compatible endpoints. - # TODO: Nous Portal will add transparent proxy support — re-enable - # for _is_nous when their backend is updated. - if provider_preferences and _is_openrouter: - extra_body["provider"] = provider_preferences _is_nous = "nousresearch" in self._base_url_lower - - # Kimi/Moonshot API uses extra_body.thinking (separate from the - # top-level reasoning_effort) to enable/disable reasoning mode. - # Mirror Kimi CLI's with_thinking() behavior exactly — see - # MoonshotAI/kimi-cli packages/kosong/src/kosong/chat_provider/kimi.py + _is_nvidia = "integrate.api.nvidia.com" in self._base_url_lower _is_kimi = ( base_url_host_matches(self.base_url, "api.kimi.com") or base_url_host_matches(self.base_url, "moonshot.ai") or base_url_host_matches(self.base_url, "moonshot.cn") ) - if _is_kimi: - _kimi_thinking_enabled = True - if self.reasoning_config and isinstance(self.reasoning_config, dict): - if self.reasoning_config.get("enabled") is False: - _kimi_thinking_enabled = False - extra_body["thinking"] = { - "type": "enabled" if _kimi_thinking_enabled else "disabled", + + # Temperature: _fixed_temperature_for_model may return OMIT_TEMPERATURE + # sentinel (temperature omitted entirely), a numeric override, or None. + try: + from agent.auxiliary_client import _fixed_temperature_for_model, OMIT_TEMPERATURE + _ft = _fixed_temperature_for_model(self.model, self.base_url) + _omit_temp = _ft is OMIT_TEMPERATURE + _fixed_temp = _ft if not _omit_temp else None + except Exception: + _omit_temp = False + _fixed_temp = None + + # Provider preferences (OpenRouter-specific) + _prefs: Dict[str, Any] = {} + if self.providers_allowed: + _prefs["only"] = self.providers_allowed + if self.providers_ignored: + _prefs["ignore"] = self.providers_ignored + if self.providers_order: + _prefs["order"] = self.providers_order + if self.provider_sort: + _prefs["sort"] = self.provider_sort + if self.provider_require_parameters: + _prefs["require_parameters"] = True + if self.provider_data_collection: + _prefs["data_collection"] = self.provider_data_collection + + # Anthropic max output for Claude on OpenRouter/Nous + _ant_max = None + if (_is_or or _is_nous) and "claude" in (self.model or "").lower(): + try: + from agent.anthropic_adapter import _get_anthropic_max_output + _ant_max = _get_anthropic_max_output(self.model) + except Exception: + pass # fail open — let the proxy pick its default + + # Qwen session metadata precomputed here (promptId is per-call random) + _qwen_meta = None + if _is_qwen: + _qwen_meta = { + "sessionId": self.session_id or "hermes", + "promptId": str(uuid.uuid4()), } - if self._supports_reasoning_extra_body(): - if _is_github_models: - github_reasoning = self._github_models_reasoning_extra_body() - if github_reasoning is not None: - extra_body["reasoning"] = github_reasoning - else: - if self.reasoning_config is not None: - rc = dict(self.reasoning_config) - # Nous Portal requires reasoning enabled — don't send - # enabled=false to it (would cause 400). - if _is_nous and rc.get("enabled") is False: - pass # omit reasoning entirely for Nous when disabled - else: - extra_body["reasoning"] = rc - else: - extra_body["reasoning"] = { - "enabled": True, - "effort": "medium" - } + # Ephemeral max output override — consume immediately so the next + # turn doesn't inherit it. + _ephemeral_out = getattr(self, "_ephemeral_max_output_tokens", None) + if _ephemeral_out is not None: + self._ephemeral_max_output_tokens = None - # Nous Portal product attribution - if _is_nous: - extra_body["tags"] = ["product=hermes-agent"] - - # Ollama num_ctx: override the 2048 default so the model actually - # uses the context window it was trained for. Passed via the OpenAI - # SDK's extra_body → options.num_ctx, which Ollama's OpenAI-compat - # endpoint forwards to the runner as --ctx-size. - if self._ollama_num_ctx: - options = extra_body.get("options", {}) - options["num_ctx"] = self._ollama_num_ctx - extra_body["options"] = options - - # Ollama / custom provider: pass think=false when reasoning is disabled. - # Ollama does not recognise the OpenRouter-style `reasoning` extra_body - # field, so we use its native `think` parameter instead. - # This prevents thinking-capable models (Qwen3, etc.) from generating - # blocks and producing empty-response errors when the user has - # set reasoning_effort: none. - if self.provider == "custom" and self.reasoning_config and isinstance(self.reasoning_config, dict): - _effort = (self.reasoning_config.get("effort") or "").strip().lower() - _enabled = self.reasoning_config.get("enabled", True) - if _effort == "none" or _enabled is False: - extra_body["think"] = False - - if self._is_qwen_portal(): - extra_body["vl_high_resolution_images"] = True - - if extra_body: - api_kwargs["extra_body"] = extra_body - - # Priority Processing / generic request overrides (e.g. service_tier). - # Applied last so overrides win over any defaults set above. - if self.request_overrides: - api_kwargs.update(self.request_overrides) - - return api_kwargs + return _ct.build_kwargs( + model=self.model, + messages=api_messages, + tools=self.tools, + timeout=self._resolved_api_call_timeout(), + max_tokens=self.max_tokens, + ephemeral_max_output_tokens=_ephemeral_out, + max_tokens_param_fn=self._max_tokens_param, + reasoning_config=self.reasoning_config, + request_overrides=self.request_overrides, + session_id=getattr(self, "session_id", None), + model_lower=(self.model or "").lower(), + is_openrouter=_is_or, + is_nous=_is_nous, + is_qwen_portal=_is_qwen, + is_github_models=_is_gh, + is_nvidia_nim=_is_nvidia, + is_kimi=_is_kimi, + is_custom_provider=self.provider == "custom", + ollama_num_ctx=self._ollama_num_ctx, + provider_preferences=_prefs or None, + qwen_prepare_fn=self._qwen_prepare_chat_messages if _is_qwen else None, + qwen_prepare_inplace_fn=self._qwen_prepare_chat_messages_inplace if _is_qwen else None, + qwen_session_metadata=_qwen_meta, + fixed_temperature=_fixed_temp, + omit_temperature=_omit_temp, + supports_reasoning=self._supports_reasoning_extra_body(), + github_reasoning_extra=self._github_models_reasoning_extra_body() if _is_gh else None, + anthropic_max_output=_ant_max, + ) def _supports_reasoning_extra_body(self) -> bool: """Return True when reasoning extra_body is safe to send for this route/model. @@ -7428,7 +7227,7 @@ class AIAgent: if not _aux_available and self.api_mode == "codex_responses": # No auxiliary client -- use the Codex Responses path directly codex_kwargs = self._build_api_kwargs(api_messages) - codex_kwargs["tools"] = self._responses_tools([memory_tool_def]) + codex_kwargs["tools"] = self._get_codex_transport().convert_tools([memory_tool_def]) if _flush_temperature is not None: codex_kwargs["temperature"] = _flush_temperature else: @@ -7463,9 +7262,15 @@ class AIAgent: # Extract tool calls from the response, handling all API formats tool_calls = [] if self.api_mode == "codex_responses" and not _aux_available: - assistant_msg, _ = self._normalize_codex_response(response) - if assistant_msg and assistant_msg.tool_calls: - tool_calls = assistant_msg.tool_calls + _ct_flush = self._get_codex_transport() + _cnr_flush = _ct_flush.normalize_response(response) + if _cnr_flush and _cnr_flush.tool_calls: + tool_calls = [ + SimpleNamespace( + id=tc.id, type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) for tc in _cnr_flush.tool_calls + ] elif self.api_mode == "anthropic_messages" and not _aux_available: _tfn = self._get_anthropic_transport() _flush_nr = _tfn.normalize_response(response, strip_tool_prefix=self._is_anthropic_oauth) @@ -7635,8 +7440,27 @@ class AIAgent: finally: self._executing_tools = False + def _dispatch_delegate_task(self, function_args: dict) -> str: + """Single call site for delegate_task dispatch. + + New DELEGATE_TASK_SCHEMA fields only need to be added here to reach all + invocation paths (concurrent, sequential, inline). + """ + from tools.delegate_tool import delegate_task as _delegate_task + return _delegate_task( + goal=function_args.get("goal"), + context=function_args.get("context"), + toolsets=function_args.get("toolsets"), + tasks=function_args.get("tasks"), + max_iterations=function_args.get("max_iterations"), + acp_command=function_args.get("acp_command"), + acp_args=function_args.get("acp_args"), + role=function_args.get("role"), + parent_agent=self, + ) + def _invoke_tool(self, function_name: str, function_args: dict, effective_task_id: str, - tool_call_id: Optional[str] = None) -> str: + tool_call_id: Optional[str] = None, messages: list = None) -> str: """Invoke a single tool and return the result string. No display logic. Handles both agent-level tools (todo, memory, etc.) and registry-dispatched @@ -7704,15 +7528,7 @@ class AIAgent: callback=self.clarify_callback, ) elif function_name == "delegate_task": - from tools.delegate_tool import delegate_task as _delegate_task - return _delegate_task( - goal=function_args.get("goal"), - context=function_args.get("context"), - toolsets=function_args.get("toolsets"), - tasks=function_args.get("tasks"), - max_iterations=function_args.get("max_iterations"), - parent_agent=self, - ) + return self._dispatch_delegate_task(function_args) else: return handle_function_call( function_name, function_args, effective_task_id, @@ -7874,7 +7690,7 @@ class AIAgent: pass start = time.time() try: - result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id) + result = self._invoke_tool(function_name, function_args, effective_task_id, tool_call.id, messages=messages) except Exception as tool_error: result = f"Error executing tool '{function_name}': {tool_error}" logger.error("_invoke_tool raised for %s: %s", function_name, tool_error, exc_info=True) @@ -8226,7 +8042,6 @@ class AIAgent: if self._should_emit_quiet_tool_messages(): self._vprint(f" {_get_cute_tool_message_impl('clarify', function_args, tool_duration, result=function_result)}") elif function_name == "delegate_task": - from tools.delegate_tool import delegate_task as _delegate_task tasks_arg = function_args.get("tasks") if tasks_arg and isinstance(tasks_arg, list): spinner_label = f"🔀 delegating {len(tasks_arg)} tasks" @@ -8241,14 +8056,7 @@ class AIAgent: self._delegate_spinner = spinner _delegate_result = None try: - function_result = _delegate_task( - goal=function_args.get("goal"), - context=function_args.get("context"), - toolsets=function_args.get("toolsets"), - tasks=tasks_arg, - max_iterations=function_args.get("max_iterations"), - parent_agent=self, - ) + function_result = self._dispatch_delegate_task(function_args) _delegate_result = function_result finally: self._delegate_spinner = None @@ -8506,8 +8314,9 @@ class AIAgent: codex_kwargs = self._build_api_kwargs(api_messages) codex_kwargs.pop("tools", None) summary_response = self._run_codex_stream(codex_kwargs) - assistant_message, _ = self._normalize_codex_response(summary_response) - final_response = (assistant_message.content or "").strip() if assistant_message else "" + _ct_sum = self._get_codex_transport() + _cnr_sum = _ct_sum.normalize_response(summary_response) + final_response = (_cnr_sum.content or "").strip() else: summary_kwargs = { "model": self.model, @@ -8564,8 +8373,9 @@ class AIAgent: codex_kwargs = self._build_api_kwargs(api_messages) codex_kwargs.pop("tools", None) retry_response = self._run_codex_stream(codex_kwargs) - retry_msg, _ = self._normalize_codex_response(retry_response) - final_response = (retry_msg.content or "").strip() if retry_msg else "" + _ct_retry = self._get_codex_transport() + _cnr_retry = _ct_retry.normalize_response(retry_response) + final_response = (_cnr_retry.content or "").strip() elif self.api_mode == "anthropic_messages": _tretry = self._get_anthropic_transport() _ant_kw2 = _tretry.build_kwargs(model=self.model, messages=api_messages, tools=None, @@ -8676,6 +8486,11 @@ class AIAgent: self._persist_user_message_override = persist_user_message # Generate unique task_id if not provided to isolate VMs between concurrent tasks effective_task_id = task_id or str(uuid.uuid4()) + # Expose the active task_id so tools running mid-turn (e.g. delegate_task + # in delegate_tool.py) can identify this agent for the cross-agent file + # state registry. Set BEFORE any tool dispatch so snapshots taken at + # child-launch time see the parent's real id, not None. + self._current_task_id = effective_task_id # Reset retry counters and iteration budget at the start of each turn # so subagent usage from a previous turn doesn't eat into the next one. @@ -9322,7 +9137,7 @@ class AIAgent: if self._force_ascii_payload: _sanitize_structure_non_ascii(api_kwargs) if self.api_mode == "codex_responses": - api_kwargs = self._preflight_codex_api_kwargs(api_kwargs, allow_stream=False) + api_kwargs = self._get_codex_transport().preflight_kwargs(api_kwargs, allow_stream=False) try: from hermes_cli.plugins import invoke_hook as _invoke_hook @@ -9410,38 +9225,34 @@ class AIAgent: response_invalid = False error_details = [] if self.api_mode == "codex_responses": - output_items = getattr(response, "output", None) if response is not None else None - if response is None: - response_invalid = True - error_details.append("response is None") - elif not isinstance(output_items, list): - response_invalid = True - error_details.append("response.output is not a list") - elif not output_items: - # Stream backfill may have failed, but - # _normalize_codex_response can still recover - # from response.output_text. Only mark invalid - # when that fallback is also absent. - _out_text = getattr(response, "output_text", None) - _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else "" - if _out_text_stripped: - logger.debug( - "Codex response.output is empty but output_text is present " - "(%d chars); deferring to normalization.", - len(_out_text_stripped), - ) - else: - _resp_status = getattr(response, "status", None) - _resp_incomplete = getattr(response, "incomplete_details", None) - logger.warning( - "Codex response.output is empty after stream backfill " - "(status=%s, incomplete_details=%s, model=%s). %s", - _resp_status, _resp_incomplete, - getattr(response, "model", None), - f"api_mode={self.api_mode} provider={self.provider}", - ) + _ct_v = self._get_codex_transport() + if not _ct_v.validate_response(response): + if response is None: response_invalid = True - error_details.append("response.output is empty") + error_details.append("response is None") + else: + # output_text fallback: stream backfill may have failed + # but normalize can still recover from output_text + _out_text = getattr(response, "output_text", None) + _out_text_stripped = _out_text.strip() if isinstance(_out_text, str) else "" + if _out_text_stripped: + logger.debug( + "Codex response.output is empty but output_text is present " + "(%d chars); deferring to normalization.", + len(_out_text_stripped), + ) + else: + _resp_status = getattr(response, "status", None) + _resp_incomplete = getattr(response, "incomplete_details", None) + logger.warning( + "Codex response.output is empty after stream backfill " + "(status=%s, incomplete_details=%s, model=%s). %s", + _resp_status, _resp_incomplete, + getattr(response, "model", None), + f"api_mode={self.api_mode} provider={self.provider}", + ) + response_invalid = True + error_details.append("response.output is empty") elif self.api_mode == "anthropic_messages": _tv = self._get_anthropic_transport() if not _tv.validate_response(response): @@ -9450,8 +9261,17 @@ class AIAgent: error_details.append("response is None") else: error_details.append("response.content invalid (not a non-empty list)") + elif self.api_mode == "bedrock_converse": + _btv = self._get_bedrock_transport() + if not _btv.validate_response(response): + response_invalid = True + if response is None: + error_details.append("response is None") + else: + error_details.append("Bedrock response invalid (no output or choices)") else: - if response is None or not hasattr(response, 'choices') or response.choices is None or not response.choices: + _ctv = self._get_chat_completions_transport() + if not _ctv.validate_response(response): response_invalid = True if response is None: error_details.append("response is None") @@ -9612,6 +9432,10 @@ class AIAgent: elif self.api_mode == "anthropic_messages": _tfr = self._get_anthropic_transport() finish_reason = _tfr.map_finish_reason(response.stop_reason) + elif self.api_mode == "bedrock_converse": + # Bedrock response is already normalized at dispatch — finish_reason + # is already in OpenAI format via normalize_converse_response() + finish_reason = response.choices[0].finish_reason if hasattr(response, "choices") and response.choices else "stop" else: finish_reason = response.choices[0].finish_reason assistant_message = response.choices[0].message @@ -10867,7 +10691,40 @@ class AIAgent: try: if self.api_mode == "codex_responses": - assistant_message, finish_reason = self._normalize_codex_response(response) + _ct = self._get_codex_transport() + _cnr = _ct.normalize_response(response) + # Back-compat shim: downstream expects SimpleNamespace with + # codex-specific fields (.codex_reasoning_items, .reasoning_details, + # and .call_id/.response_item_id on tool calls). + _tc_list = None + if _cnr.tool_calls: + _tc_list = [] + for tc in _cnr.tool_calls: + _tc_ns = SimpleNamespace( + id=tc.id, type="function", + function=SimpleNamespace(name=tc.name, arguments=tc.arguments), + ) + if tc.provider_data: + if tc.provider_data.get("call_id"): + _tc_ns.call_id = tc.provider_data["call_id"] + if tc.provider_data.get("response_item_id"): + _tc_ns.response_item_id = tc.provider_data["response_item_id"] + _tc_list.append(_tc_ns) + assistant_message = SimpleNamespace( + content=_cnr.content, + tool_calls=_tc_list or None, + reasoning=_cnr.reasoning, + reasoning_content=None, + codex_reasoning_items=( + _cnr.provider_data.get("codex_reasoning_items") + if _cnr.provider_data else None + ), + reasoning_details=( + _cnr.provider_data.get("reasoning_details") + if _cnr.provider_data else None + ), + ) + finish_reason = _cnr.finish_reason elif self.api_mode == "anthropic_messages": _transport = self._get_anthropic_transport() _nr = _transport.normalize_response( diff --git a/scripts/release.py b/scripts/release.py index f9b247e077..5a82b89b98 100755 --- a/scripts/release.py +++ b/scripts/release.py @@ -45,6 +45,7 @@ AUTHOR_MAP = { "teknium@nousresearch.com": "teknium1", "127238744+teknium1@users.noreply.github.com": "teknium1", # contributors (from noreply pattern) + "wangqiang@wangqiangdeMac-mini.local": "xiaoqiang243", "snreynolds2506@gmail.com": "snreynolds", "35742124+0xbyt4@users.noreply.github.com": "0xbyt4", "71184274+MassiveMassimo@users.noreply.github.com": "MassiveMassimo", @@ -98,6 +99,7 @@ AUTHOR_MAP = { "mygamez@163.com": "zhongyueming1121", "hansnow@users.noreply.github.com": "hansnow", "134848055+UNLINEARITY@users.noreply.github.com": "UNLINEARITY", + "ben.burtenshaw@gmail.com": "burtenshaw", # contributors (manual mapping from git names) "ahmedsherif95@gmail.com": "asheriif", "liujinkun@bytedance.com": "liujinkun2025", @@ -129,6 +131,7 @@ AUTHOR_MAP = { "brooklyn.bb.nicholson@gmail.com": "brooklynnicholson", "withapurpose37@gmail.com": "StefanIsMe", "4317663+helix4u@users.noreply.github.com": "helix4u", + "ifkellx@users.noreply.github.com": "Ifkellx", "331214+counterposition@users.noreply.github.com": "counterposition", "blspear@gmail.com": "BrennerSpear", "akhater@gmail.com": "akhater", @@ -332,6 +335,7 @@ AUTHOR_MAP = { "asslaenn5@gmail.com": "Aslaaen", "shalompmc0505@naver.com": "pinion05", "105142614+VTRiot@users.noreply.github.com": "VTRiot", + "vivien000812@gmail.com": "iamagenius00", } diff --git a/skills/creative/baoyu-comic/PORT_NOTES.md b/skills/creative/baoyu-comic/PORT_NOTES.md new file mode 100644 index 0000000000..637b7befb5 --- /dev/null +++ b/skills/creative/baoyu-comic/PORT_NOTES.md @@ -0,0 +1,77 @@ +# Port Notes — baoyu-comic + +Ported from [JimLiu/baoyu-skills](https://github.com/JimLiu/baoyu-skills) v1.56.1. + +## Changes from upstream + +### SKILL.md adaptations + +| Change | Upstream | Hermes | +|--------|----------|--------| +| Metadata namespace | `openclaw` | `hermes` (with `tags` + `homepage`) | +| Trigger | Slash commands / CLI flags | Natural language skill matching | +| User config | EXTEND.md file (project/user/XDG paths) | Removed — not part of Hermes infra | +| User prompts | `AskUserQuestion` (batched) | `clarify` tool (one question at a time) | +| Image generation | baoyu-imagine (Bun/TypeScript, supports `--ref`) | `image_generate` — **prompt-only**, returns a URL; no reference image input; agent must download the URL to the output directory | +| PDF assembly | `scripts/merge-to-pdf.ts` (Bun + `pdf-lib`) | Removed — the PDF merge step is out of scope for this port; pages are delivered as PNGs only | +| Platform support | Linux/macOS/Windows/WSL/PowerShell | Linux/macOS only | +| File operations | Generic instructions | Hermes file tools (`write_file`, `read_file`) | + +### Structural removals + +- **`references/config/` directory** (removed entirely): + - `first-time-setup.md` — blocking first-time setup flow for EXTEND.md + - `preferences-schema.md` — EXTEND.md YAML schema + - `watermark-guide.md` — watermark config (tied to EXTEND.md) +- **`scripts/` directory** (removed entirely): upstream's `merge-to-pdf.ts` depended on `pdf-lib`, which is not declared anywhere in the Hermes repo. Rather than add a new dependency, the port drops PDF assembly and delivers per-page PNGs. +- **Workflow Step 8 (Merge to PDF)** removed from `workflow.md`; Step 9 (Completion report) renumbered to Step 8. +- **Workflow Step 1.1** — "Load Preferences (EXTEND.md)" section removed from `workflow.md`; steps 1.2/1.3 renumbered to 1.1/1.2. +- **Generic "User Input Tools" and "Image Generation Tools" preambles** — SKILL.md no longer lists fallback rules for multiple possible tools; it references `clarify` and `image_generate` directly. + +### Image generation strategy changes + +`image_generate`'s schema accepts only `prompt` and `aspect_ratio` (`landscape` | `portrait` | `square`). Upstream's reference-image flow (`--ref characters.png` for character consistency, plus user-supplied refs for style/palette/scene) does not map to this tool, so the workflow was restructured: + +- **Character sheet PNG** is still generated for multi-page comics, but it is repositioned as a **human-facing review artifact** (for visual verification) and a reference for later regenerations / manual prompt edits. Page prompts themselves are built from the **text descriptions** in `characters/characters.md` (embedded inline during Step 5). `image_generate` never sees the PNG as a visual input. +- **User-supplied reference images** are reduced to `style` / `palette` / `scene` trait extraction — traits are embedded in the prompt body; the image files themselves are kept only for provenance under `refs/`. +- **Page prompts** now mandate that character descriptions are embedded inline (copied from `characters/characters.md`) — this is the only mechanism left to enforce cross-page character consistency. +- **Download step** — after every `image_generate` call, the returned URL is fetched to disk (e.g., `curl -fsSL "" -o .png`) and verified before the workflow advances. + +### SKILL.md reductions + +- CLI option columns (`--art`, `--tone`, `--layout`, `--aspect`, `--lang`, `--ref`, `--storyboard-only`, `--prompts-only`, `--images-only`, `--regenerate`) converted to plain-English option descriptions. +- Preset files (`presets/*.md`) and `ohmsha-guide.md`: `` `--style X` `` / `` `--art X --tone Y` `` shorthand rewritten to `art=X, tone=Y` + natural-language references. +- `partial-workflows.md`: per-skill slash command invocations rewritten as user-intent cues; PDF-related outputs removed. +- `auto-selection.md`: priority order dropped the EXTEND.md tier. +- `analysis-framework.md`: language-priority comment updated (user option → conversation → source). + +### File naming convention + +Source content pasted by the user is saved as `source-{slug}.md`, where `{slug}` is the kebab-case topic slug used for the output directory. Backups follow the same pattern with a `-backup-YYYYMMDD-HHMMSS` suffix. SKILL.md and `workflow.md` now agree on this single convention. + +### What was preserved verbatim + +- All 6 art-style definitions (`references/art-styles/`) +- All 7 tone definitions (`references/tones/`) +- All 7 layout definitions (`references/layouts/`) +- Core templates: `character-template.md`, `storyboard-template.md`, `base-prompt.md` +- Preset bodies (only the first few intro lines adapted; special rules unchanged) +- Author, version, homepage attribution + +## Syncing with upstream + +To pull upstream updates: + +```bash +# Compare versions +curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/SKILL.md | head -5 +# Look for the version: line + +# Diff a reference file +diff <(curl -sL https://raw.githubusercontent.com/JimLiu/baoyu-skills/main/skills/baoyu-comic/references/art-styles/manga.md) \ + references/art-styles/manga.md +``` + +Art-style, tone, and layout reference files can usually be overwritten directly (they're upstream-verbatim). `SKILL.md`, `references/workflow.md`, `references/partial-workflows.md`, `references/auto-selection.md`, `references/analysis-framework.md`, `references/ohmsha-guide.md`, and `references/presets/*.md` must be manually merged since they contain Hermes-specific adaptations. + +If upstream adds a Hermes-compatible PDF merge step (no extra npm deps), restore `scripts/` and reintroduce Step 8 in `workflow.md`. diff --git a/skills/creative/baoyu-comic/SKILL.md b/skills/creative/baoyu-comic/SKILL.md new file mode 100644 index 0000000000..d3c89ed4c7 --- /dev/null +++ b/skills/creative/baoyu-comic/SKILL.md @@ -0,0 +1,246 @@ +--- +name: baoyu-comic +description: Knowledge comic creator supporting multiple art styles and tones. Creates original educational comics with detailed panel layouts and sequential image generation. Use when user asks to create "知识漫画", "教育漫画", "biography comic", "tutorial comic", or "Logicomix-style comic". +version: 1.56.1 +author: 宝玉 (JimLiu) +license: MIT +metadata: + hermes: + tags: [comic, knowledge-comic, creative, image-generation] + homepage: https://github.com/JimLiu/baoyu-skills#baoyu-comic +--- + +# Knowledge Comic Creator + +Adapted from [baoyu-comic](https://github.com/JimLiu/baoyu-skills) for Hermes Agent's tool ecosystem. + +Create original knowledge comics with flexible art style × tone combinations. + +## When to Use + +Trigger this skill when the user asks to create a knowledge/educational comic, biography comic, tutorial comic, or uses terms like "知识漫画", "教育漫画", or "Logicomix-style". The user provides content (text, file path, URL, or topic) and optionally specifies art style, tone, layout, aspect ratio, or language. + +## Reference Images + +Hermes' `image_generate` tool is **prompt-only** — it accepts a text prompt and an aspect ratio, and returns an image URL. It does **NOT** accept reference images. When the user supplies a reference image, use it to **extract traits in text** that get embedded in every page prompt: + +**Intake**: Accept file paths when the user provides them (or pastes images in conversation). +- File path(s) → copy to `refs/NN-ref-{slug}.{ext}` alongside the comic output for provenance +- Pasted image with no path → ask the user for the path via `clarify`, or extract style traits verbally as a text fallback +- No reference → skip this section + +**Usage modes** (per reference): + +| Usage | Effect | +|-------|--------| +| `style` | Extract style traits (line treatment, texture, mood) and append to every page's prompt body | +| `palette` | Extract hex colors and append to every page's prompt body | +| `scene` | Extract scene composition or subject notes and append to the relevant page(s) | + +**Record in each page's prompt frontmatter** when refs exist: + +```yaml +references: + - ref_id: 01 + filename: 01-ref-scene.png + usage: style + traits: "muted earth tones, soft-edged ink wash, low-contrast backgrounds" +``` + +Character consistency is driven by **text descriptions** in `characters/characters.md` (written in Step 3) that get embedded inline in every page prompt (Step 5). The optional PNG character sheet generated in Step 7.1 is a human-facing review artifact, not an input to `image_generate`. + +## Options + +### Visual Dimensions + +| Option | Values | Description | +|--------|--------|-------------| +| Art | ligne-claire (default), manga, realistic, ink-brush, chalk, minimalist | Art style / rendering technique | +| Tone | neutral (default), warm, dramatic, romantic, energetic, vintage, action | Mood / atmosphere | +| Layout | standard (default), cinematic, dense, splash, mixed, webtoon, four-panel | Panel arrangement | +| Aspect | 3:4 (default, portrait), 4:3 (landscape), 16:9 (widescreen) | Page aspect ratio | +| Language | auto (default), zh, en, ja, etc. | Output language | +| Refs | File paths | Reference images used for style / palette trait extraction (not passed to the image model). See [Reference Images](#reference-images) above. | + +### Partial Workflow Options + +| Option | Description | +|--------|-------------| +| Storyboard only | Generate storyboard only, skip prompts and images | +| Prompts only | Generate storyboard + prompts, skip images | +| Images only | Generate images from existing prompts directory | +| Regenerate N | Regenerate specific page(s) only (e.g., `3` or `2,5,8`) | + +Details: [references/partial-workflows.md](references/partial-workflows.md) + +### Art, Tone & Preset Catalogue + +- **Art styles** (6): `ligne-claire`, `manga`, `realistic`, `ink-brush`, `chalk`, `minimalist`. Full definitions at `references/art-styles/