feat(custom): prompt and persist explicit api_mode for custom providers

Adds an explicit API compatibility mode prompt to the `hermes model -> custom` flow so Codex-compatible third-party endpoints (and any other non-default backend whose URL doesn't match the existing heuristics in `_detect_api_mode_for_url`) can be selected explicitly instead of silently falling back to chat_completions. Choices: Auto-detect / chat_completions / codex_responses / anthropic_messages. Persists `api_mode` to: - `model.api_mode` (active session config) - the matching `custom_providers[*]` entry (so re-activating the named provider next time replays the same transport) Salvaged from PR #6125 onto current main: kept the new prompt and the `_save_custom_provider(api_mode=...)` plumbing; the named-custom flow already extracts and applies `api_mode` from the saved entry on current main so those changes are preserved as-is. Test fixtures updated for the new prompt and the existing display-name prompt. Co-authored-by: littlewwwhite <1095245867@qq.com>
2026-05-18 04:41:56 +00:00 · 2026-05-13 08:46:01 -07:00 · 2026-05-13 08:46:01 -07:00 · 6f2d1c88b7
commit 6f2d1c88b7
parent 1979ef5802
4 changed files with 200 additions and 7 deletions
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -3079,6 +3079,21 @@ def _model_flow_custom(config):
            else:
                print(f"  If /v1 should not be in the base URL, try: {suggested}")

+    # Prompt for API compatibility mode explicitly so codex-compatible custom
+    # providers don't silently fall back to chat_completions.
+    current_model_cfg = config.get("model")
+    current_api_mode = ""
+    if isinstance(current_model_cfg, dict):
+        current_api_mode = str(current_model_cfg.get("api_mode") or "").strip()
+    api_mode = _prompt_custom_api_mode_selection(
+        effective_url,
+        current_api_mode=current_api_mode,
+    )
+    if api_mode:
+        print(f"  API mode: {api_mode}")
+    else:
+        print("  API mode: auto-detect")
+
    # Select model — use probe results when available, fall back to manual input
    model_name = ""
    detected_models = probe.get("models") or []
@ -3142,7 +3157,10 @@ def _model_flow_custom(config):
        model["base_url"] = effective_url
        if effective_key:
            model["api_key"] = effective_key
-        model.pop("api_mode", None)  # let runtime auto-detect from URL
+        if api_mode:
+            model["api_mode"] = api_mode
+        else:
+            model.pop("api_mode", None)
        save_config(cfg)
        deactivate_provider()

@ -3165,7 +3183,10 @@ def _model_flow_custom(config):
        _caller_model["base_url"] = effective_url
        if effective_key:
            _caller_model["api_key"] = effective_key
-        _caller_model.pop("api_mode", None)
+        if api_mode:
+            _caller_model["api_mode"] = api_mode
+        else:
+            _caller_model.pop("api_mode", None)
        config["model"] = _caller_model
        print("Endpoint saved. Use `/model` in chat or `hermes model` to set a model.")

@ -3176,9 +3197,80 @@ def _model_flow_custom(config):
        model_name or "",
        context_length=context_length,
        name=display_name,
+        api_mode=api_mode,
    )


+def _prompt_custom_api_mode_selection(base_url: str, current_api_mode: str = "") -> Optional[str]:
+    """Prompt for a custom provider API mode.
+
+    Returns an explicit mode string, or None to keep auto-detect behavior.
+    """
+    from hermes_cli.runtime_provider import _detect_api_mode_for_url
+
+    detected_mode = _detect_api_mode_for_url(base_url)
+    normalized_current = str(current_api_mode or "").strip().lower()
+    default_mode = normalized_current or detected_mode or ""
+
+    mode_options = [
+        (
+            "",
+            "Auto-detect",
+            "Use Hermes URL heuristics; best for standard OpenAI-compatible endpoints.",
+        ),
+        (
+            "chat_completions",
+            "Chat Completions",
+            "Use /chat/completions for standard OpenAI-compatible servers.",
+        ),
+        (
+            "codex_responses",
+            "Responses / Codex",
+            "Use /responses for Codex-compatible tool-calling backends.",
+        ),
+        (
+            "anthropic_messages",
+            "Anthropic Messages",
+            "Use /v1/messages for Anthropic-compatible endpoints.",
+        ),
+    ]
+
+    print()
+    print("Select API compatibility mode:")
+    for idx, (value, label, description) in enumerate(mode_options, 1):
+        markers = []
+        if value == detected_mode:
+            markers.append("detected")
+        if value == default_mode:
+            markers.append("current")
+        suffix = f" [{' / '.join(markers)}]" if markers else ""
+        print(f"  {idx}. {label}{suffix}")
+        print(f"     {description}")
+
+    try:
+        raw = input(
+            "Choice [1-4, Enter to keep current/detected]: "
+        ).strip().lower()
+    except (KeyboardInterrupt, EOFError):
+        print("\nCancelled.")
+        raise
+
+    if not raw:
+        return default_mode or None
+
+    if raw in {"1", "auto", "detect", "auto-detect"}:
+        return None
+    if raw in {"2", "chat", "chat_completions", "completions"}:
+        return "chat_completions"
+    if raw in {"3", "responses", "codex", "codex_responses"}:
+        return "codex_responses"
+    if raw in {"4", "anthropic", "anthropic_messages", "messages"}:
+        return "anthropic_messages"
+
+    print(f"Invalid API mode choice: {raw}. Falling back to auto-detect.")
+    return None
+
+
 def _auto_provider_name(base_url: str) -> str:
    """Generate a display name from a custom endpoint URL.

@ -3214,12 +3306,12 @@ def _custom_provider_api_key_config_value(provider_info, resolved_api_key=""):


 def _save_custom_provider(
-    base_url, api_key="", model="", context_length=None, name=None
+    base_url, api_key="", model="", context_length=None, name=None, api_mode=None
 ):
    """Save a custom endpoint to custom_providers in config.yaml.

    Deduplicates by base_url — if the URL already exists, updates the
-    model name and context_length but doesn't add a duplicate entry.
+    model name, context_length, and api_mode but doesn't add a duplicate entry.
    Uses *name* when provided, otherwise auto-generates from the URL.
    """
    from hermes_cli.config import load_config, save_config
@ -3245,6 +3337,13 @@ def _save_custom_provider(
                models_cfg[model] = {"context_length": context_length}
                entry["models"] = models_cfg
                changed = True
+            if api_mode:
+                if entry.get("api_mode") != api_mode:
+                    entry["api_mode"] = api_mode
+                    changed = True
+            elif "api_mode" in entry:
+                entry.pop("api_mode", None)
+                changed = True
            if changed:
                cfg["custom_providers"] = providers
                save_config(cfg)
@ -3259,6 +3358,8 @@ def _save_custom_provider(
        entry["api_key"] = api_key
    if model:
        entry["model"] = model
+    if api_mode:
+        entry["api_mode"] = api_mode
    if model and context_length:
        entry["models"] = {model: {"context_length": context_length}}

@ -3712,7 +3813,7 @@ def _model_flow_named_custom(config, provider_info):
                save_config(cfg)
    else:
        # Save model name to the custom_providers entry for next time
-        _save_custom_provider(base_url, config_api_key, model_name)
+        _save_custom_provider(base_url, config_api_key, model_name, api_mode=api_mode)

    print(f"\n✅ Model set to: {model_name}")
    print(f"   Provider: {name} ({base_url})")