diff --git a/agent/anthropic_adapter.py b/agent/anthropic_adapter.py index 630656a2b..830c0f4de 100644 --- a/agent/anthropic_adapter.py +++ b/agent/anthropic_adapter.py @@ -60,6 +60,8 @@ _ANTHROPIC_OUTPUT_LIMITS = { "claude-3-opus": 4_096, "claude-3-sonnet": 4_096, "claude-3-haiku": 4_096, + # Third-party Anthropic-compatible providers + "minimax": 131_072, } # For any model not in the table, assume the highest current limit. @@ -1313,9 +1315,10 @@ def build_anthropic_kwargs( # Map reasoning_config to Anthropic's thinking parameter. # Claude 4.6 models use adaptive thinking + output_config.effort. # Older models use manual thinking with budget_tokens. - # Haiku and MiniMax models do NOT support extended thinking — skip entirely. + # MiniMax Anthropic-compat endpoints support thinking (manual mode only, + # not adaptive). Haiku does NOT support extended thinking — skip entirely. if reasoning_config and isinstance(reasoning_config, dict): - if reasoning_config.get("enabled") is not False and "haiku" not in model.lower() and "minimax" not in model.lower(): + if reasoning_config.get("enabled") is not False and "haiku" not in model.lower(): effort = str(reasoning_config.get("effort", "medium")).lower() budget = THINKING_BUDGET.get(effort, 8000) if _supports_adaptive_thinking(model): diff --git a/agent/model_metadata.py b/agent/model_metadata.py index 2d1c02ac9..31d220110 100644 --- a/agent/model_metadata.py +++ b/agent/model_metadata.py @@ -115,15 +115,9 @@ DEFAULT_CONTEXT_LENGTHS = { "llama": 131072, # Qwen "qwen": 131072, - # MiniMax (lowercase — lookup lowercases model names at line 973) - "minimax-m1-256k": 1000000, - "minimax-m1-128k": 1000000, - "minimax-m1-80k": 1000000, - "minimax-m1-40k": 1000000, - "minimax-m1": 1000000, - "minimax-m2.5": 1048576, - "minimax-m2.7": 1048576, - "minimax": 1048576, + # MiniMax — official docs: 204,800 context for all models + # https://platform.minimax.io/docs/api-reference/text-anthropic-api + "minimax": 204800, # GLM "glm": 202752, # xAI Grok — xAI /v1/models does not return context_length metadata, @@ -151,7 +145,7 @@ DEFAULT_CONTEXT_LENGTHS = { "deepseek-ai/DeepSeek-V3.2": 65536, "moonshotai/Kimi-K2.5": 262144, "moonshotai/Kimi-K2-Thinking": 262144, - "MiniMaxAI/MiniMax-M2.5": 1048576, + "MiniMaxAI/MiniMax-M2.5": 204800, "XiaomiMiMo/MiMo-V2-Flash": 32768, "mimo-v2-pro": 1048576, "mimo-v2-omni": 1048576, diff --git a/hermes_cli/doctor.py b/hermes_cli/doctor.py index 1a2f839c0..46242b68c 100644 --- a/hermes_cli/doctor.py +++ b/hermes_cli/doctor.py @@ -722,9 +722,9 @@ def run_doctor(args): ("DeepSeek", ("DEEPSEEK_API_KEY",), "https://api.deepseek.com/v1/models", "DEEPSEEK_BASE_URL", True), ("Hugging Face", ("HF_TOKEN",), "https://router.huggingface.co/v1/models", "HF_BASE_URL", True), ("Alibaba/DashScope", ("DASHSCOPE_API_KEY",), "https://dashscope-intl.aliyuncs.com/compatible-mode/v1/models", "DASHSCOPE_BASE_URL", True), - # MiniMax APIs don't support /models endpoint — https://github.com/NousResearch/hermes-agent/issues/811 - ("MiniMax", ("MINIMAX_API_KEY",), None, "MINIMAX_BASE_URL", False), - ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), None, "MINIMAX_CN_BASE_URL", False), + # MiniMax: the /anthropic endpoint doesn't support /models, but the /v1 endpoint does. + ("MiniMax", ("MINIMAX_API_KEY",), "https://api.minimax.io/v1/models", "MINIMAX_BASE_URL", True), + ("MiniMax (China)", ("MINIMAX_CN_API_KEY",), "https://api.minimaxi.com/v1/models", "MINIMAX_CN_BASE_URL", True), ("AI Gateway", ("AI_GATEWAY_API_KEY",), "https://ai-gateway.vercel.sh/v1/models", "AI_GATEWAY_BASE_URL", True), ("Kilo Code", ("KILOCODE_API_KEY",), "https://api.kilo.ai/api/gateway/models", "KILOCODE_BASE_URL", True), ("OpenCode Zen", ("OPENCODE_ZEN_API_KEY",), "https://opencode.ai/zen/v1/models", "OPENCODE_ZEN_BASE_URL", True), @@ -749,6 +749,11 @@ def run_doctor(args): # Auto-detect Kimi Code keys (sk-kimi-) → api.kimi.com if not _base and _key.startswith("sk-kimi-"): _base = "https://api.kimi.com/coding/v1" + # Anthropic-compat endpoints (/anthropic) don't support /models. + # Rewrite to the OpenAI-compat /v1 surface for health checks. + if _base and _base.rstrip("/").endswith("/anthropic"): + from agent.auxiliary_client import _to_openai_base_url + _base = _to_openai_base_url(_base) _url = (_base.rstrip("/") + "/models") if _base else _default_url _headers = {"Authorization": f"Bearer {_key}"} if "api.kimi.com" in _url.lower(): diff --git a/hermes_cli/models.py b/hermes_cli/models.py index 0d9929486..dda8f94bf 100644 --- a/hermes_cli/models.py +++ b/hermes_cli/models.py @@ -157,22 +157,16 @@ _PROVIDER_MODELS: dict[str, list[str]] = { "kimi-k2-0905-preview", ], "minimax": [ - "MiniMax-M1", - "MiniMax-M1-40k", - "MiniMax-M1-80k", - "MiniMax-M1-128k", - "MiniMax-M1-256k", - "MiniMax-M2.5", "MiniMax-M2.7", + "MiniMax-M2.5", + "MiniMax-M2.1", + "MiniMax-M2", ], "minimax-cn": [ - "MiniMax-M1", - "MiniMax-M1-40k", - "MiniMax-M1-80k", - "MiniMax-M1-128k", - "MiniMax-M1-256k", - "MiniMax-M2.5", "MiniMax-M2.7", + "MiniMax-M2.5", + "MiniMax-M2.1", + "MiniMax-M2", ], "anthropic": [ "claude-opus-4-6", diff --git a/hermes_cli/providers.py b/hermes_cli/providers.py index 899c35874..78be527db 100644 --- a/hermes_cli/providers.py +++ b/hermes_cli/providers.py @@ -88,11 +88,11 @@ HERMES_OVERLAYS: Dict[str, HermesOverlay] = { base_url_env_var="KIMI_BASE_URL", ), "minimax": HermesOverlay( - transport="openai_chat", + transport="anthropic_messages", base_url_env_var="MINIMAX_BASE_URL", ), "minimax-cn": HermesOverlay( - transport="openai_chat", + transport="anthropic_messages", base_url_env_var="MINIMAX_CN_BASE_URL", ), "deepseek": HermesOverlay( diff --git a/hermes_cli/setup.py b/hermes_cli/setup.py index 216ab54a5..26a0f3c37 100644 --- a/hermes_cli/setup.py +++ b/hermes_cli/setup.py @@ -106,8 +106,8 @@ _DEFAULT_PROVIDER_MODELS = { ], "zai": ["glm-5", "glm-4.7", "glm-4.5", "glm-4.5-flash"], "kimi-coding": ["kimi-k2.5", "kimi-k2-thinking", "kimi-k2-turbo-preview"], - "minimax": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"], - "minimax-cn": ["MiniMax-M1", "MiniMax-M1-40k", "MiniMax-M1-80k", "MiniMax-M1-128k", "MiniMax-M1-256k", "MiniMax-M2.5", "MiniMax-M2.7"], + "minimax": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"], + "minimax-cn": ["MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"], "ai-gateway": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5", "google/gemini-3-flash"], "kilocode": ["anthropic/claude-opus-4.6", "anthropic/claude-sonnet-4.6", "openai/gpt-5.4", "google/gemini-3-pro-preview", "google/gemini-3-flash-preview"], "opencode-zen": ["gpt-5.4", "gpt-5.3-codex", "claude-sonnet-4-6", "gemini-3-flash", "glm-5", "kimi-k2.5", "minimax-m2.7"], diff --git a/run_agent.py b/run_agent.py index 428154197..2901ef10a 100644 --- a/run_agent.py +++ b/run_agent.py @@ -766,7 +766,7 @@ class AIAgent: # conversation prefix. Uses system_and_3 strategy (4 breakpoints). is_openrouter = self._is_openrouter_url() is_claude = "claude" in self.model.lower() - is_native_anthropic = self.api_mode == "anthropic_messages" + is_native_anthropic = self.api_mode == "anthropic_messages" and self.provider == "anthropic" self._use_prompt_caching = (is_openrouter and is_claude) or is_native_anthropic self._cache_ttl = "5m" # Default 5-minute TTL (1.25x write cost) @@ -1510,7 +1510,11 @@ class AIAgent: resolve_anthropic_token, _is_oauth_token, ) - effective_key = api_key or self.api_key or resolve_anthropic_token() or "" + # Only fall back to ANTHROPIC_TOKEN when the provider is actually Anthropic. + # Other anthropic_messages providers (MiniMax, Alibaba, etc.) must use their own + # API key — falling back would send Anthropic credentials to third-party endpoints. + _is_native_anthropic = new_provider == "anthropic" + effective_key = (api_key or self.api_key or resolve_anthropic_token() or "") if _is_native_anthropic else (api_key or self.api_key or "") self.api_key = effective_key self._anthropic_api_key = effective_key self._anthropic_base_url = base_url or getattr(self, "_anthropic_base_url", None) @@ -1534,7 +1538,7 @@ class AIAgent: ) # ── Re-evaluate prompt caching ── - is_native_anthropic = api_mode == "anthropic_messages" + is_native_anthropic = api_mode == "anthropic_messages" and new_provider == "anthropic" self._use_prompt_caching = ( ("openrouter" in (self.base_url or "").lower() and "claude" in new_model.lower()) or is_native_anthropic @@ -5297,7 +5301,7 @@ class AIAgent: } # Re-evaluate prompt caching for the new provider/model - is_native_anthropic = fb_api_mode == "anthropic_messages" + is_native_anthropic = fb_api_mode == "anthropic_messages" and fb_provider == "anthropic" self._use_prompt_caching = ( ("openrouter" in fb_base_url.lower() and "claude" in fb_model.lower()) or is_native_anthropic @@ -5633,11 +5637,12 @@ class AIAgent: def _anthropic_preserve_dots(self) -> bool: """True when using an anthropic-compatible endpoint that preserves dots in model names. Alibaba/DashScope keeps dots (e.g. qwen3.5-plus). + MiniMax keeps dots (e.g. MiniMax-M2.7). OpenCode Go keeps dots (e.g. minimax-m2.7).""" - if (getattr(self, "provider", "") or "").lower() in {"alibaba", "opencode-go"}: + if (getattr(self, "provider", "") or "").lower() in {"alibaba", "minimax", "minimax-cn", "opencode-go"}: return True base = (getattr(self, "base_url", "") or "").lower() - return "dashscope" in base or "aliyuncs" in base or "opencode.ai/zen/go" in base + return "dashscope" in base or "aliyuncs" in base or "minimax" in base or "opencode.ai/zen/go" in base def _is_qwen_portal(self) -> bool: """Return True when the base URL targets Qwen Portal.""" diff --git a/tests/agent/test_minimax_provider.py b/tests/agent/test_minimax_provider.py index 23bdcd476..1673bfd94 100644 --- a/tests/agent/test_minimax_provider.py +++ b/tests/agent/test_minimax_provider.py @@ -1,37 +1,37 @@ -"""Tests for MiniMax provider hardening — context lengths, thinking guard, catalog, beta headers.""" +"""Tests for MiniMax provider hardening — context lengths, thinking, catalog, beta headers, transport.""" from unittest.mock import patch class TestMinimaxContextLengths: - """Verify per-model context length entries for MiniMax models.""" + """Verify context length entries match official docs (204,800 for all models). - def test_m1_variants_have_1m_context(self): + Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api + """ + + def test_minimax_prefix_has_correct_context(self): from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS - # Keys are lowercase because the lookup lowercases model names - for model in ("minimax-m1", "minimax-m1-40k", "minimax-m1-80k", - "minimax-m1-128k", "minimax-m1-256k"): - assert model in DEFAULT_CONTEXT_LENGTHS, f"{model} missing from context lengths" - assert DEFAULT_CONTEXT_LENGTHS[model] == 1_000_000, f"{model} expected 1M" + assert DEFAULT_CONTEXT_LENGTHS["minimax"] == 204_800 - def test_m2_variants_have_1m_context(self): - from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS - # Keys are lowercase because the lookup lowercases model names - for model in ("minimax-m2.5", "minimax-m2.7"): - assert model in DEFAULT_CONTEXT_LENGTHS, f"{model} missing from context lengths" - assert DEFAULT_CONTEXT_LENGTHS[model] == 1_048_576, f"{model} expected 1048576" - - def test_minimax_prefix_fallback(self): - from agent.model_metadata import DEFAULT_CONTEXT_LENGTHS - # The generic "minimax" prefix entry should be 1M for unknown models - assert DEFAULT_CONTEXT_LENGTHS["minimax"] == 1_048_576 + def test_minimax_models_resolve_via_prefix(self): + from agent.model_metadata import get_model_context_length + # All MiniMax models should resolve to 204,800 via the "minimax" prefix + for model in ("MiniMax-M2.7", "MiniMax-M2.5", "MiniMax-M2.1", "MiniMax-M2"): + ctx = get_model_context_length(model, "") + assert ctx == 204_800, f"{model} expected 204800, got {ctx}" -class TestMinimaxThinkingGuard: - """Verify that build_anthropic_kwargs does NOT add thinking params for MiniMax models.""" +class TestMinimaxThinkingSupport: + """Verify that MiniMax gets manual thinking (not adaptive). - def test_no_thinking_for_minimax_m27(self): + MiniMax's Anthropic-compat endpoint officially supports the thinking + parameter (https://platform.minimax.io/docs/api-reference/text-anthropic-api). + It should get manual thinking (type=enabled + budget_tokens), NOT adaptive + thinking (which is Claude 4.6-only). + """ + + def test_minimax_m27_gets_manual_thinking(self): from agent.anthropic_adapter import build_anthropic_kwargs kwargs = build_anthropic_kwargs( model="MiniMax-M2.7", @@ -40,19 +40,23 @@ class TestMinimaxThinkingGuard: max_tokens=4096, reasoning_config={"enabled": True, "effort": "medium"}, ) - assert "thinking" not in kwargs + assert "thinking" in kwargs + assert kwargs["thinking"]["type"] == "enabled" + assert "budget_tokens" in kwargs["thinking"] + # MiniMax should NOT get adaptive thinking or output_config assert "output_config" not in kwargs - def test_no_thinking_for_minimax_m1(self): + def test_minimax_m25_gets_manual_thinking(self): from agent.anthropic_adapter import build_anthropic_kwargs kwargs = build_anthropic_kwargs( - model="MiniMax-M1-128k", + model="MiniMax-M2.5", messages=[{"role": "user", "content": "hello"}], tools=None, max_tokens=4096, reasoning_config={"enabled": True, "effort": "high"}, ) - assert "thinking" not in kwargs + assert "thinking" in kwargs + assert kwargs["thinking"]["type"] == "enabled" def test_thinking_still_works_for_claude(self): from agent.anthropic_adapter import build_anthropic_kwargs @@ -81,25 +85,30 @@ class TestMinimaxAuxModel: class TestMinimaxModelCatalog: - """Verify the model catalog includes M1 family and excludes deprecated models.""" + """Verify the model catalog matches official Anthropic-compat endpoint models. - def test_catalog_includes_m1_family(self): + Source: https://platform.minimax.io/docs/api-reference/text-anthropic-api + """ + + def test_catalog_includes_current_models(self): from hermes_cli.models import _PROVIDER_MODELS for provider in ("minimax", "minimax-cn"): models = _PROVIDER_MODELS[provider] - assert "MiniMax-M1" in models - assert "MiniMax-M1-40k" in models - assert "MiniMax-M1-80k" in models - assert "MiniMax-M1-128k" in models - assert "MiniMax-M1-256k" in models + assert "MiniMax-M2.7" in models + assert "MiniMax-M2.5" in models + assert "MiniMax-M2.1" in models + assert "MiniMax-M2" in models - def test_catalog_excludes_deprecated(self): + def test_catalog_excludes_m1_family(self): + """M1 models are not available on the /anthropic endpoint.""" from hermes_cli.models import _PROVIDER_MODELS for provider in ("minimax", "minimax-cn"): models = _PROVIDER_MODELS[provider] - assert "MiniMax-M2.1" not in models + assert "MiniMax-M1" not in models def test_catalog_excludes_highspeed(self): + """Highspeed variants are available but not shown in default catalog + (users can still specify them manually).""" from hermes_cli.models import _PROVIDER_MODELS for provider in ("minimax", "minimax-cn"): models = _PROVIDER_MODELS[provider] @@ -202,3 +211,154 @@ class TestMinimaxBetaHeaders: def test_common_betas_regular_url(self): from agent.anthropic_adapter import _common_betas_for_base_url, _COMMON_BETAS assert _common_betas_for_base_url("https://api.anthropic.com") == _COMMON_BETAS + + +class TestMinimaxApiMode: + """Verify determine_api_mode returns anthropic_messages for MiniMax providers. + + The MiniMax /anthropic endpoint speaks Anthropic Messages wire format, + not OpenAI chat completions. The overlay transport must reflect this + so that code paths calling determine_api_mode() without a base_url + (e.g. /model switch) get the correct api_mode. + """ + + def test_minimax_returns_anthropic_messages(self): + from hermes_cli.providers import determine_api_mode + assert determine_api_mode("minimax") == "anthropic_messages" + + def test_minimax_cn_returns_anthropic_messages(self): + from hermes_cli.providers import determine_api_mode + assert determine_api_mode("minimax-cn") == "anthropic_messages" + + def test_minimax_with_url_also_works(self): + from hermes_cli.providers import determine_api_mode + # Even with explicit base_url, provider lookup takes priority + assert determine_api_mode("minimax", "https://api.minimax.io/anthropic") == "anthropic_messages" + + def test_anthropic_still_returns_anthropic_messages(self): + from hermes_cli.providers import determine_api_mode + assert determine_api_mode("anthropic") == "anthropic_messages" + + def test_openai_returns_chat_completions(self): + from hermes_cli.providers import determine_api_mode + # Sanity check: standard providers are unaffected + result = determine_api_mode("deepseek") + assert result == "chat_completions" + + +class TestMinimaxMaxOutput: + """Verify _get_anthropic_max_output returns correct limits for MiniMax models. + + MiniMax max output is 131,072 tokens (source: OpenClaw model definitions, + cross-referenced with MiniMax API behavior). + """ + + def test_minimax_m27_output_limit(self): + from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("MiniMax-M2.7") == 131_072 + + def test_minimax_m25_output_limit(self): + from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("MiniMax-M2.5") == 131_072 + + def test_minimax_m2_output_limit(self): + from agent.anthropic_adapter import _get_anthropic_max_output + assert _get_anthropic_max_output("MiniMax-M2") == 131_072 + + def test_claude_output_unaffected(self): + from agent.anthropic_adapter import _get_anthropic_max_output + # Sanity: Claude limits are not broken by the MiniMax entry + assert _get_anthropic_max_output("claude-sonnet-4-6") == 64_000 + + +class TestMinimaxPreserveDots: + """Verify that MiniMax model names preserve dots through the Anthropic adapter. + + MiniMax model IDs like 'MiniMax-M2.7' must NOT have dots converted to + hyphens — the endpoint expects the exact name with dots. + """ + + def test_minimax_provider_preserves_dots(self): + from types import SimpleNamespace + agent = SimpleNamespace(provider="minimax", base_url="") + from run_agent import AIAgent + assert AIAgent._anthropic_preserve_dots(agent) is True + + def test_minimax_cn_provider_preserves_dots(self): + from types import SimpleNamespace + agent = SimpleNamespace(provider="minimax-cn", base_url="") + from run_agent import AIAgent + assert AIAgent._anthropic_preserve_dots(agent) is True + + def test_minimax_url_preserves_dots(self): + from types import SimpleNamespace + agent = SimpleNamespace(provider="custom", base_url="https://api.minimax.io/anthropic") + from run_agent import AIAgent + assert AIAgent._anthropic_preserve_dots(agent) is True + + def test_minimax_cn_url_preserves_dots(self): + from types import SimpleNamespace + agent = SimpleNamespace(provider="custom", base_url="https://api.minimaxi.com/anthropic") + from run_agent import AIAgent + assert AIAgent._anthropic_preserve_dots(agent) is True + + def test_anthropic_does_not_preserve_dots(self): + from types import SimpleNamespace + agent = SimpleNamespace(provider="anthropic", base_url="https://api.anthropic.com") + from run_agent import AIAgent + assert AIAgent._anthropic_preserve_dots(agent) is False + + def test_normalize_preserves_m27_dot(self): + from agent.anthropic_adapter import normalize_model_name + assert normalize_model_name("MiniMax-M2.7", preserve_dots=True) == "MiniMax-M2.7" + + def test_normalize_converts_without_preserve(self): + from agent.anthropic_adapter import normalize_model_name + # Without preserve_dots, dots become hyphens (broken for MiniMax) + assert normalize_model_name("MiniMax-M2.7", preserve_dots=False) == "MiniMax-M2-7" + + +class TestMinimaxSwitchModelCredentialGuard: + """Verify switch_model() does not leak Anthropic credentials to MiniMax. + + The __init__ path correctly guards against this (line 761), but switch_model() + must mirror that guard. Without it, /model switch to minimax with no explicit + api_key would fall back to resolve_anthropic_token() and send Anthropic creds + to the MiniMax endpoint. + """ + + def test_switch_to_minimax_does_not_resolve_anthropic_token(self): + """switch_model() should NOT call resolve_anthropic_token() for MiniMax.""" + from unittest.mock import patch, MagicMock + + with patch("run_agent.AIAgent.__init__", return_value=None): + from run_agent import AIAgent + agent = AIAgent.__new__(AIAgent) + agent.provider = "anthropic" + agent.model = "claude-sonnet-4" + agent.api_key = "sk-ant-fake" + agent.base_url = "https://api.anthropic.com" + agent.api_mode = "anthropic_messages" + agent._anthropic_base_url = "https://api.anthropic.com" + agent._anthropic_api_key = "sk-ant-fake" + agent._is_anthropic_oauth = False + agent._client_kwargs = {} + agent.client = None + agent._anthropic_client = MagicMock() + + with patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, \ + patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-leaked") as mock_resolve, \ + patch("agent.anthropic_adapter._is_oauth_token", return_value=False): + + agent.switch_model( + new_model="MiniMax-M2.7", + new_provider="minimax", + api_mode="anthropic_messages", + api_key="mm-key-123", + base_url="https://api.minimax.io/anthropic", + ) + # resolve_anthropic_token should NOT be called for non-Anthropic providers + mock_resolve.assert_not_called() + # The key passed to build_anthropic_client should be the MiniMax key + build_args = mock_build.call_args + assert build_args[0][0] == "mm-key-123" diff --git a/trajectory_compressor.py b/trajectory_compressor.py index 583db8af2..6bc0a499e 100644 --- a/trajectory_compressor.py +++ b/trajectory_compressor.py @@ -375,8 +375,9 @@ class TrajectoryCompressor: f"Missing API key. Set {self.config.api_key_env} " f"environment variable.") from openai import OpenAI + from agent.auxiliary_client import _to_openai_base_url self.client = OpenAI( - api_key=api_key, base_url=self.config.base_url) + api_key=api_key, base_url=_to_openai_base_url(self.config.base_url)) # AsyncOpenAI is created lazily in _get_async_client() so it # binds to the current event loop — avoids "Event loop is closed" # when process_directory() is called multiple times (each call @@ -395,10 +396,11 @@ class TrajectoryCompressor: avoiding "Event loop is closed" errors on repeated calls. """ from openai import AsyncOpenAI + from agent.auxiliary_client import _to_openai_base_url # Always create a fresh client so it binds to the running loop. self.async_client = AsyncOpenAI( api_key=self._async_client_api_key, - base_url=self.config.base_url, + base_url=_to_openai_base_url(self.config.base_url), ) return self.async_client