fix: three provider-related bugs (#8161, #8181, #8147) (#8243)

- Add openai/openai-codex -> openai mapping to PROVIDER_TO_MODELS_DEV so context-length lookups use models.dev data instead of 128k fallback. Fixes #8161. - Set api_mode from custom_providers entry when switching via hermes model, and clear stale api_mode when the entry has none. Also extract api_mode in _named_custom_provider_map(). Fixes #8181. - Convert OpenAI image_url content blocks to Anthropic image blocks when the endpoint is Anthropic-compatible (MiniMax, MiniMax-CN, or any URL containing /anthropic). Fixes #8147.
2026-04-25 00:51:20 +00:00 · 2026-04-12 01:44:18 -07:00 · 2026-04-12 01:44:18 -07:00 · 078dba015d
commit 078dba015d
parent b1f13a8c5f
6 changed files with 214 additions and 1 deletions
--- a/agent/auxiliary_client.py
+++ b/agent/auxiliary_client.py
@ -2112,6 +2112,75 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float
    return default


+# ---------------------------------------------------------------------------
+# Anthropic-compatible endpoint detection + image block conversion
+# ---------------------------------------------------------------------------
+
+# Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper).
+# Their image content blocks must use Anthropic format, not OpenAI format.
+_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"})
+
+
+def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool:
+    """Detect if an endpoint expects Anthropic-format content blocks.
+
+    Returns True for known Anthropic-compatible providers (MiniMax) and
+    any endpoint whose URL contains ``/anthropic`` in the path.
+    """
+    if provider in _ANTHROPIC_COMPAT_PROVIDERS:
+        return True
+    url_lower = (base_url or "").lower()
+    return "/anthropic" in url_lower
+
+
+def _convert_openai_images_to_anthropic(messages: list) -> list:
+    """Convert OpenAI ``image_url`` content blocks to Anthropic ``image`` blocks.
+
+    Only touches messages that have list-type content with ``image_url`` blocks;
+    plain text messages pass through unchanged.
+    """
+    converted = []
+    for msg in messages:
+        content = msg.get("content")
+        if not isinstance(content, list):
+            converted.append(msg)
+            continue
+        new_content = []
+        changed = False
+        for block in content:
+            if block.get("type") == "image_url":
+                image_url_val = (block.get("image_url") or {}).get("url", "")
+                if image_url_val.startswith("data:"):
+                    # Parse data URI: data:<media_type>;base64,<data>
+                    header, _, b64data = image_url_val.partition(",")
+                    media_type = "image/png"
+                    if ":" in header and ";" in header:
+                        media_type = header.split(":", 1)[1].split(";", 1)[0]
+                    new_content.append({
+                        "type": "image",
+                        "source": {
+                            "type": "base64",
+                            "media_type": media_type,
+                            "data": b64data,
+                        },
+                    })
+                else:
+                    # URL-based image
+                    new_content.append({
+                        "type": "image",
+                        "source": {
+                            "type": "url",
+                            "url": image_url_val,
+                        },
+                    })
+                changed = True
+            else:
+                new_content.append(block)
+        converted.append({**msg, "content": new_content} if changed else msg)
+    return converted
+
+
+
 def _build_call_kwargs(
    provider: str,
    model: str,
@ -2304,6 +2373,11 @@ def call_llm(
        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

+    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
+    _client_base = str(getattr(client, "base_url", "") or "")
+    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
+        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
+
    # Handle max_tokens vs max_completion_tokens retry, then payment fallback.
    try:
        return _validate_llm_response(
@ -2492,6 +2566,11 @@ async def async_call_llm(
        tools=tools, timeout=effective_timeout, extra_body=extra_body,
        base_url=resolved_base_url)

+    # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax)
+    _client_base = str(getattr(client, "base_url", "") or "")
+    if _is_anthropic_compat_endpoint(resolved_provider, _client_base):
+        kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"])
+
    try:
        return _validate_llm_response(
            await client.chat.completions.create(**kwargs), task)
--- a/agent/models_dev.py
+++ b/agent/models_dev.py
@ -144,6 +144,8 @@ class ProviderInfo:
 PROVIDER_TO_MODELS_DEV: Dict[str, str] = {
    "openrouter": "openrouter",
    "anthropic": "anthropic",
+    "openai": "openai",
+    "openai-codex": "openai",
    "zai": "zai",
    "kimi-coding": "kimi-for-coding",
    "minimax": "minimax",
--- a/hermes_cli/main.py
+++ b/hermes_cli/main.py
@ -1107,6 +1107,7 @@ def select_provider_and_model(args=None):
                "base_url": base_url,
                "api_key": entry.get("api_key", ""),
                "model": entry.get("model", ""),
+                "api_mode": entry.get("api_mode", ""),
            }
        return custom_provider_map

@ -1955,6 +1956,12 @@ def _model_flow_named_custom(config, provider_info):
    model["base_url"] = base_url
    if api_key:
        model["api_key"] = api_key
+    # Apply api_mode from custom_providers entry, or clear stale value
+    custom_api_mode = provider_info.get("api_mode", "")
+    if custom_api_mode:
+        model["api_mode"] = custom_api_mode
+    else:
+        model.pop("api_mode", None)  # let runtime auto-detect from URL
    save_config(cfg)
    deactivate_provider()

--- a/tests/agent/test_auxiliary_client.py
+++ b/tests/agent/test_auxiliary_client.py
@ -1628,3 +1628,74 @@ class TestStaleBaseUrlWarning:

        assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \
            "Warning should not fire a second time"
+
+
+# ---------------------------------------------------------------------------
+# Anthropic-compatible image block conversion
+# ---------------------------------------------------------------------------
+
+class TestAnthropicCompatImageConversion:
+    """Tests for _is_anthropic_compat_endpoint and _convert_openai_images_to_anthropic."""
+
+    def test_known_providers_detected(self):
+        from agent.auxiliary_client import _is_anthropic_compat_endpoint
+        assert _is_anthropic_compat_endpoint("minimax", "")
+        assert _is_anthropic_compat_endpoint("minimax-cn", "")
+
+    def test_openrouter_not_detected(self):
+        from agent.auxiliary_client import _is_anthropic_compat_endpoint
+        assert not _is_anthropic_compat_endpoint("openrouter", "")
+        assert not _is_anthropic_compat_endpoint("anthropic", "")
+
+    def test_url_based_detection(self):
+        from agent.auxiliary_client import _is_anthropic_compat_endpoint
+        assert _is_anthropic_compat_endpoint("custom", "https://api.minimax.io/anthropic")
+        assert _is_anthropic_compat_endpoint("custom", "https://example.com/anthropic/v1")
+        assert not _is_anthropic_compat_endpoint("custom", "https://api.openai.com/v1")
+
+    def test_base64_image_converted(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "text", "text": "describe"},
+                {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR="}}
+            ]
+        }]
+        result = _convert_openai_images_to_anthropic(messages)
+        img_block = result[0]["content"][1]
+        assert img_block["type"] == "image"
+        assert img_block["source"]["type"] == "base64"
+        assert img_block["source"]["media_type"] == "image/png"
+        assert img_block["source"]["data"] == "iVBOR="
+
+    def test_url_image_converted(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}}
+            ]
+        }]
+        result = _convert_openai_images_to_anthropic(messages)
+        img_block = result[0]["content"][0]
+        assert img_block["type"] == "image"
+        assert img_block["source"]["type"] == "url"
+        assert img_block["source"]["url"] == "https://example.com/img.jpg"
+
+    def test_text_only_messages_unchanged(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{"role": "user", "content": "Hello"}]
+        result = _convert_openai_images_to_anthropic(messages)
+        assert result[0] is messages[0]  # same object, not copied
+
+    def test_jpeg_media_type_parsed(self):
+        from agent.auxiliary_client import _convert_openai_images_to_anthropic
+        messages = [{
+            "role": "user",
+            "content": [
+                {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,/9j/="}}
+            ]
+        }]
+        result = _convert_openai_images_to_anthropic(messages)
+        assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg"
--- a/tests/agent/test_models_dev.py
+++ b/tests/agent/test_models_dev.py
@ -87,7 +87,10 @@ class TestProviderMapping:

    def test_unmapped_provider_not_in_dict(self):
        assert "nous" not in PROVIDER_TO_MODELS_DEV
-        assert "openai-codex" not in PROVIDER_TO_MODELS_DEV
+
+    def test_openai_codex_mapped_to_openai(self):
+        assert PROVIDER_TO_MODELS_DEV["openai"] == "openai"
+        assert PROVIDER_TO_MODELS_DEV["openai-codex"] == "openai"


 class TestExtractContext:
--- a/tests/hermes_cli/test_custom_provider_model_switch.py
+++ b/tests/hermes_cli/test_custom_provider_model_switch.py
@ -122,3 +122,54 @@ class TestCustomProviderModelSwitch:
        model = config.get("model")
        assert isinstance(model, dict)
        assert model["default"] == "model-X"
+
+    def test_api_mode_set_from_provider_info(self, config_home):
+        """When custom_providers entry has api_mode, it should be applied."""
+        import yaml
+        from hermes_cli.main import _model_flow_named_custom
+
+        provider_info = {
+            "name": "Anthropic Proxy",
+            "base_url": "https://proxy.example.com/anthropic",
+            "api_key": "***",
+            "model": "claude-3",
+            "api_mode": "anthropic_messages",
+        }
+
+        with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]), \
+             patch.dict("sys.modules", {"simple_term_menu": None}), \
+             patch("builtins.input", return_value="1"), \
+             patch("builtins.print"):
+            _model_flow_named_custom({}, provider_info)
+
+        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
+        model = config.get("model")
+        assert isinstance(model, dict)
+        assert model.get("api_mode") == "anthropic_messages"
+
+    def test_api_mode_cleared_when_not_specified(self, config_home):
+        """When custom_providers entry has no api_mode, stale api_mode is removed."""
+        import yaml
+        from hermes_cli.main import _model_flow_named_custom
+
+        # Pre-seed a stale api_mode in config
+        config_path = config_home / "config.yaml"
+        config_path.write_text(yaml.dump({"model": {"api_mode": "anthropic_messages"}}))
+
+        provider_info = {
+            "name": "My vLLM",
+            "base_url": "https://vllm.example.com/v1",
+            "api_key": "***",
+            "model": "llama-3",
+        }
+
+        with patch("hermes_cli.models.fetch_api_models", return_value=["llama-3"]), \
+             patch.dict("sys.modules", {"simple_term_menu": None}), \
+             patch("builtins.input", return_value="1"), \
+             patch("builtins.print"):
+            _model_flow_named_custom({}, provider_info)
+
+        config = yaml.safe_load((config_home / "config.yaml").read_text()) or {}
+        model = config.get("model")
+        assert isinstance(model, dict)
+        assert "api_mode" not in model, "Stale api_mode should be removed"