diff --git a/agent/auxiliary_client.py b/agent/auxiliary_client.py index a4c4df8e9..3dcc78a98 100644 --- a/agent/auxiliary_client.py +++ b/agent/auxiliary_client.py @@ -2112,6 +2112,75 @@ def _get_task_timeout(task: str, default: float = _DEFAULT_AUX_TIMEOUT) -> float return default +# --------------------------------------------------------------------------- +# Anthropic-compatible endpoint detection + image block conversion +# --------------------------------------------------------------------------- + +# Providers that use Anthropic-compatible endpoints (via OpenAI SDK wrapper). +# Their image content blocks must use Anthropic format, not OpenAI format. +_ANTHROPIC_COMPAT_PROVIDERS = frozenset({"minimax", "minimax-cn"}) + + +def _is_anthropic_compat_endpoint(provider: str, base_url: str) -> bool: + """Detect if an endpoint expects Anthropic-format content blocks. + + Returns True for known Anthropic-compatible providers (MiniMax) and + any endpoint whose URL contains ``/anthropic`` in the path. + """ + if provider in _ANTHROPIC_COMPAT_PROVIDERS: + return True + url_lower = (base_url or "").lower() + return "/anthropic" in url_lower + + +def _convert_openai_images_to_anthropic(messages: list) -> list: + """Convert OpenAI ``image_url`` content blocks to Anthropic ``image`` blocks. + + Only touches messages that have list-type content with ``image_url`` blocks; + plain text messages pass through unchanged. + """ + converted = [] + for msg in messages: + content = msg.get("content") + if not isinstance(content, list): + converted.append(msg) + continue + new_content = [] + changed = False + for block in content: + if block.get("type") == "image_url": + image_url_val = (block.get("image_url") or {}).get("url", "") + if image_url_val.startswith("data:"): + # Parse data URI: data:;base64, + header, _, b64data = image_url_val.partition(",") + media_type = "image/png" + if ":" in header and ";" in header: + media_type = header.split(":", 1)[1].split(";", 1)[0] + new_content.append({ + "type": "image", + "source": { + "type": "base64", + "media_type": media_type, + "data": b64data, + }, + }) + else: + # URL-based image + new_content.append({ + "type": "image", + "source": { + "type": "url", + "url": image_url_val, + }, + }) + changed = True + else: + new_content.append(block) + converted.append({**msg, "content": new_content} if changed else msg) + return converted + + + def _build_call_kwargs( provider: str, model: str, @@ -2304,6 +2373,11 @@ def call_llm( tools=tools, timeout=effective_timeout, extra_body=extra_body, base_url=resolved_base_url) + # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax) + _client_base = str(getattr(client, "base_url", "") or "") + if _is_anthropic_compat_endpoint(resolved_provider, _client_base): + kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"]) + # Handle max_tokens vs max_completion_tokens retry, then payment fallback. try: return _validate_llm_response( @@ -2492,6 +2566,11 @@ async def async_call_llm( tools=tools, timeout=effective_timeout, extra_body=extra_body, base_url=resolved_base_url) + # Convert image blocks for Anthropic-compatible endpoints (e.g. MiniMax) + _client_base = str(getattr(client, "base_url", "") or "") + if _is_anthropic_compat_endpoint(resolved_provider, _client_base): + kwargs["messages"] = _convert_openai_images_to_anthropic(kwargs["messages"]) + try: return _validate_llm_response( await client.chat.completions.create(**kwargs), task) diff --git a/agent/models_dev.py b/agent/models_dev.py index f9eb49dbf..e20a2d414 100644 --- a/agent/models_dev.py +++ b/agent/models_dev.py @@ -144,6 +144,8 @@ class ProviderInfo: PROVIDER_TO_MODELS_DEV: Dict[str, str] = { "openrouter": "openrouter", "anthropic": "anthropic", + "openai": "openai", + "openai-codex": "openai", "zai": "zai", "kimi-coding": "kimi-for-coding", "minimax": "minimax", diff --git a/hermes_cli/main.py b/hermes_cli/main.py index 9f73c07a5..2e580bea8 100644 --- a/hermes_cli/main.py +++ b/hermes_cli/main.py @@ -1107,6 +1107,7 @@ def select_provider_and_model(args=None): "base_url": base_url, "api_key": entry.get("api_key", ""), "model": entry.get("model", ""), + "api_mode": entry.get("api_mode", ""), } return custom_provider_map @@ -1955,6 +1956,12 @@ def _model_flow_named_custom(config, provider_info): model["base_url"] = base_url if api_key: model["api_key"] = api_key + # Apply api_mode from custom_providers entry, or clear stale value + custom_api_mode = provider_info.get("api_mode", "") + if custom_api_mode: + model["api_mode"] = custom_api_mode + else: + model.pop("api_mode", None) # let runtime auto-detect from URL save_config(cfg) deactivate_provider() diff --git a/tests/agent/test_auxiliary_client.py b/tests/agent/test_auxiliary_client.py index e1164ace8..77004c4e1 100644 --- a/tests/agent/test_auxiliary_client.py +++ b/tests/agent/test_auxiliary_client.py @@ -1628,3 +1628,74 @@ class TestStaleBaseUrlWarning: assert not any("OPENAI_BASE_URL is set" in rec.message for rec in caplog.records), \ "Warning should not fire a second time" + + +# --------------------------------------------------------------------------- +# Anthropic-compatible image block conversion +# --------------------------------------------------------------------------- + +class TestAnthropicCompatImageConversion: + """Tests for _is_anthropic_compat_endpoint and _convert_openai_images_to_anthropic.""" + + def test_known_providers_detected(self): + from agent.auxiliary_client import _is_anthropic_compat_endpoint + assert _is_anthropic_compat_endpoint("minimax", "") + assert _is_anthropic_compat_endpoint("minimax-cn", "") + + def test_openrouter_not_detected(self): + from agent.auxiliary_client import _is_anthropic_compat_endpoint + assert not _is_anthropic_compat_endpoint("openrouter", "") + assert not _is_anthropic_compat_endpoint("anthropic", "") + + def test_url_based_detection(self): + from agent.auxiliary_client import _is_anthropic_compat_endpoint + assert _is_anthropic_compat_endpoint("custom", "https://api.minimax.io/anthropic") + assert _is_anthropic_compat_endpoint("custom", "https://example.com/anthropic/v1") + assert not _is_anthropic_compat_endpoint("custom", "https://api.openai.com/v1") + + def test_base64_image_converted(self): + from agent.auxiliary_client import _convert_openai_images_to_anthropic + messages = [{ + "role": "user", + "content": [ + {"type": "text", "text": "describe"}, + {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBOR="}} + ] + }] + result = _convert_openai_images_to_anthropic(messages) + img_block = result[0]["content"][1] + assert img_block["type"] == "image" + assert img_block["source"]["type"] == "base64" + assert img_block["source"]["media_type"] == "image/png" + assert img_block["source"]["data"] == "iVBOR=" + + def test_url_image_converted(self): + from agent.auxiliary_client import _convert_openai_images_to_anthropic + messages = [{ + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": "https://example.com/img.jpg"}} + ] + }] + result = _convert_openai_images_to_anthropic(messages) + img_block = result[0]["content"][0] + assert img_block["type"] == "image" + assert img_block["source"]["type"] == "url" + assert img_block["source"]["url"] == "https://example.com/img.jpg" + + def test_text_only_messages_unchanged(self): + from agent.auxiliary_client import _convert_openai_images_to_anthropic + messages = [{"role": "user", "content": "Hello"}] + result = _convert_openai_images_to_anthropic(messages) + assert result[0] is messages[0] # same object, not copied + + def test_jpeg_media_type_parsed(self): + from agent.auxiliary_client import _convert_openai_images_to_anthropic + messages = [{ + "role": "user", + "content": [ + {"type": "image_url", "image_url": {"url": "data:image/jpeg;base64,/9j/="}} + ] + }] + result = _convert_openai_images_to_anthropic(messages) + assert result[0]["content"][0]["source"]["media_type"] == "image/jpeg" diff --git a/tests/agent/test_models_dev.py b/tests/agent/test_models_dev.py index 9f11d731e..be4b3b139 100644 --- a/tests/agent/test_models_dev.py +++ b/tests/agent/test_models_dev.py @@ -87,7 +87,10 @@ class TestProviderMapping: def test_unmapped_provider_not_in_dict(self): assert "nous" not in PROVIDER_TO_MODELS_DEV - assert "openai-codex" not in PROVIDER_TO_MODELS_DEV + + def test_openai_codex_mapped_to_openai(self): + assert PROVIDER_TO_MODELS_DEV["openai"] == "openai" + assert PROVIDER_TO_MODELS_DEV["openai-codex"] == "openai" class TestExtractContext: diff --git a/tests/hermes_cli/test_custom_provider_model_switch.py b/tests/hermes_cli/test_custom_provider_model_switch.py index d48610a63..a0123670b 100644 --- a/tests/hermes_cli/test_custom_provider_model_switch.py +++ b/tests/hermes_cli/test_custom_provider_model_switch.py @@ -122,3 +122,54 @@ class TestCustomProviderModelSwitch: model = config.get("model") assert isinstance(model, dict) assert model["default"] == "model-X" + + def test_api_mode_set_from_provider_info(self, config_home): + """When custom_providers entry has api_mode, it should be applied.""" + import yaml + from hermes_cli.main import _model_flow_named_custom + + provider_info = { + "name": "Anthropic Proxy", + "base_url": "https://proxy.example.com/anthropic", + "api_key": "***", + "model": "claude-3", + "api_mode": "anthropic_messages", + } + + with patch("hermes_cli.models.fetch_api_models", return_value=["claude-3"]), \ + patch.dict("sys.modules", {"simple_term_menu": None}), \ + patch("builtins.input", return_value="1"), \ + patch("builtins.print"): + _model_flow_named_custom({}, provider_info) + + config = yaml.safe_load((config_home / "config.yaml").read_text()) or {} + model = config.get("model") + assert isinstance(model, dict) + assert model.get("api_mode") == "anthropic_messages" + + def test_api_mode_cleared_when_not_specified(self, config_home): + """When custom_providers entry has no api_mode, stale api_mode is removed.""" + import yaml + from hermes_cli.main import _model_flow_named_custom + + # Pre-seed a stale api_mode in config + config_path = config_home / "config.yaml" + config_path.write_text(yaml.dump({"model": {"api_mode": "anthropic_messages"}})) + + provider_info = { + "name": "My vLLM", + "base_url": "https://vllm.example.com/v1", + "api_key": "***", + "model": "llama-3", + } + + with patch("hermes_cli.models.fetch_api_models", return_value=["llama-3"]), \ + patch.dict("sys.modules", {"simple_term_menu": None}), \ + patch("builtins.input", return_value="1"), \ + patch("builtins.print"): + _model_flow_named_custom({}, provider_info) + + config = yaml.safe_load((config_home / "config.yaml").read_text()) or {} + model = config.get("model") + assert isinstance(model, dict) + assert "api_mode" not in model, "Stale api_mode should be removed"