Merge remote-tracking branch 'origin/main' into sid/types-and-lints

# Conflicts:
#	gateway/run.py
#	tools/delegate_tool.py
This commit is contained in:
alt-glitch 2026-04-22 14:27:37 +05:30
commit 847ffca715
171 changed files with 15125 additions and 1675 deletions

View file

@ -476,6 +476,133 @@ class TestGetTextAuxiliaryClient:
assert isinstance(client, CodexAuxiliaryClient)
assert model == "gpt-5.2-codex"
class TestNousAuxiliaryRefresh:
def test_try_nous_prefers_runtime_credentials(self):
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "stale-token"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value=None),
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
mock_openai.return_value = MagicMock()
client, model = _try_nous()
assert client is not None
# No Portal recommendation → falls back to the hardcoded default.
assert model == "google/gemini-3-flash-preview"
assert mock_openai.call_args.kwargs["api_key"] == "fresh-agent-key"
assert mock_openai.call_args.kwargs["base_url"] == fresh_base
def test_try_nous_uses_portal_recommendation_for_text(self):
"""When the Portal recommends a compaction model, _try_nous honors it."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="minimax/minimax-m2.7") as mock_rec,
patch("agent.auxiliary_client.OpenAI") as mock_openai,
):
from agent.auxiliary_client import _try_nous
mock_openai.return_value = MagicMock()
client, model = _try_nous(vision=False)
assert client is not None
assert model == "minimax/minimax-m2.7"
assert mock_rec.call_args.kwargs["vision"] is False
def test_try_nous_uses_portal_recommendation_for_vision(self):
"""Vision tasks should ask for the vision-specific recommendation."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", return_value="google/gemini-3-flash-preview") as mock_rec,
patch("agent.auxiliary_client.OpenAI"),
):
from agent.auxiliary_client import _try_nous
client, model = _try_nous(vision=True)
assert client is not None
assert model == "google/gemini-3-flash-preview"
assert mock_rec.call_args.kwargs["vision"] is True
def test_try_nous_falls_back_when_recommendation_lookup_raises(self):
"""If the Portal lookup throws, we must still return a usable model."""
fresh_base = "https://inference-api.nousresearch.com/v1"
with (
patch("agent.auxiliary_client._read_nous_auth", return_value={"access_token": "***"}),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", fresh_base)),
patch("hermes_cli.models.get_nous_recommended_aux_model", side_effect=RuntimeError("portal down")),
patch("agent.auxiliary_client.OpenAI"),
):
from agent.auxiliary_client import _try_nous
client, model = _try_nous()
assert client is not None
assert model == "google/gemini-3-flash-preview"
def test_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
status_code = 401
stale_client = MagicMock()
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
stale_client.chat.completions.create.side_effect = _Auth401("stale nous key")
fresh_client = MagicMock()
fresh_client.base_url = "https://inference-api.nousresearch.com/v1"
fresh_client.chat.completions.create.return_value = {"ok": True}
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
patch("agent.auxiliary_client.OpenAI", return_value=fresh_client),
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
):
result = call_llm(
task="compression",
messages=[{"role": "user", "content": "hi"}],
)
assert result == {"ok": True}
assert stale_client.chat.completions.create.call_count == 1
assert fresh_client.chat.completions.create.call_count == 1
@pytest.mark.asyncio
async def test_async_call_llm_retries_nous_after_401(self):
class _Auth401(Exception):
status_code = 401
stale_client = MagicMock()
stale_client.base_url = "https://inference-api.nousresearch.com/v1"
stale_client.chat.completions.create = AsyncMock(side_effect=_Auth401("stale nous key"))
fresh_async_client = MagicMock()
fresh_async_client.base_url = "https://inference-api.nousresearch.com/v1"
fresh_async_client.chat.completions.create = AsyncMock(return_value={"ok": True})
with (
patch("agent.auxiliary_client._resolve_task_provider_model", return_value=("nous", "nous-model", None, None, None)),
patch("agent.auxiliary_client._get_cached_client", return_value=(stale_client, "nous-model")),
patch("agent.auxiliary_client._to_async_client", return_value=(fresh_async_client, "nous-model")),
patch("agent.auxiliary_client._validate_llm_response", side_effect=lambda resp, _task: resp),
patch("agent.auxiliary_client._resolve_nous_runtime_api", return_value=("fresh-agent-key", "https://inference-api.nousresearch.com/v1")),
):
result = await async_call_llm(
task="session_search",
messages=[{"role": "user", "content": "hi"}],
)
assert result == {"ok": True}
assert stale_client.chat.completions.create.await_count == 1
assert fresh_async_client.chat.completions.create.await_count == 1
# ── Payment / credit exhaustion fallback ─────────────────────────────────

View file

@ -167,7 +167,7 @@ class TestResolveAutoMainFirst:
class TestResolveVisionMainFirst:
"""Vision auto-detection prefers main provider + main model first."""
"""Vision auto-detection prefers the main provider first."""
def test_openrouter_main_vision_uses_main_model(self, monkeypatch):
"""OpenRouter main with vision-capable model → aux vision uses main model."""
@ -200,28 +200,49 @@ class TestResolveVisionMainFirst:
assert mock_resolve.call_args.args[0] == "openrouter"
assert mock_resolve.call_args.args[1] == "anthropic/claude-sonnet-4.6"
def test_nous_main_vision_uses_main_model(self):
"""Nous Portal main → aux vision uses main model, not free-tier MiMo-V2-Omni."""
def test_nous_main_vision_uses_paid_nous_vision_backend(self):
"""Paid Nous main → aux vision uses the dedicated Nous vision backend."""
with patch(
"agent.auxiliary_client._read_main_provider", return_value="nous",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="openai/gpt-5",
), patch(
"agent.auxiliary_client.resolve_provider_client"
) as mock_resolve, patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client._resolve_strict_vision_backend",
return_value=(MagicMock(), "google/gemini-3-flash-preview"),
):
mock_client = MagicMock()
mock_resolve.return_value = (mock_client, "openai/gpt-5")
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "nous"
assert model == "openai/gpt-5"
assert client is not None
assert model == "google/gemini-3-flash-preview"
def test_nous_main_vision_uses_free_tier_nous_vision_backend(self):
"""Free-tier Nous main → aux vision uses MiMo omni, not the text main model."""
with patch(
"agent.auxiliary_client._read_main_provider", return_value="nous",
), patch(
"agent.auxiliary_client._read_main_model",
return_value="xiaomi/mimo-v2-pro",
), patch(
"agent.auxiliary_client._resolve_task_provider_model",
return_value=("auto", None, None, None, None),
), patch(
"agent.auxiliary_client._resolve_strict_vision_backend",
return_value=(MagicMock(), "xiaomi/mimo-v2-omni"),
):
from agent.auxiliary_client import resolve_vision_provider_client
provider, client, model = resolve_vision_provider_client()
assert provider == "nous"
assert client is not None
assert model == "xiaomi/mimo-v2-omni"
def test_exotic_provider_with_vision_override_preserved(self):
"""xiaomi → mimo-v2-omni override still wins over main_model."""

View file

@ -0,0 +1,111 @@
"""Tests for agent/image_gen_registry.py — provider registration & active lookup."""
from __future__ import annotations
import pytest
from agent import image_gen_registry
from agent.image_gen_provider import ImageGenProvider
class _FakeProvider(ImageGenProvider):
def __init__(self, name: str, available: bool = True):
self._name = name
self._available = available
@property
def name(self) -> str:
return self._name
def is_available(self) -> bool:
return self._available
def generate(self, prompt, aspect_ratio="landscape", **kw):
return {"success": True, "image": f"{self._name}://{prompt}"}
@pytest.fixture(autouse=True)
def _reset_registry():
image_gen_registry._reset_for_tests()
yield
image_gen_registry._reset_for_tests()
class TestRegisterProvider:
def test_register_and_lookup(self):
provider = _FakeProvider("fake")
image_gen_registry.register_provider(provider)
assert image_gen_registry.get_provider("fake") is provider
def test_rejects_non_provider(self):
with pytest.raises(TypeError):
image_gen_registry.register_provider("not a provider") # type: ignore[arg-type]
def test_rejects_empty_name(self):
class Empty(ImageGenProvider):
@property
def name(self) -> str:
return ""
def generate(self, prompt, aspect_ratio="landscape", **kw):
return {}
with pytest.raises(ValueError):
image_gen_registry.register_provider(Empty())
def test_reregister_overwrites(self):
a = _FakeProvider("same")
b = _FakeProvider("same")
image_gen_registry.register_provider(a)
image_gen_registry.register_provider(b)
assert image_gen_registry.get_provider("same") is b
def test_list_is_sorted(self):
image_gen_registry.register_provider(_FakeProvider("zeta"))
image_gen_registry.register_provider(_FakeProvider("alpha"))
names = [p.name for p in image_gen_registry.list_providers()]
assert names == ["alpha", "zeta"]
class TestGetActiveProvider:
def test_single_provider_autoresolves(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
image_gen_registry.register_provider(_FakeProvider("solo"))
active = image_gen_registry.get_active_provider()
assert active is not None and active.name == "solo"
def test_fal_preferred_on_multi_without_config(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
image_gen_registry.register_provider(_FakeProvider("fal"))
image_gen_registry.register_provider(_FakeProvider("openai"))
active = image_gen_registry.get_active_provider()
assert active is not None and active.name == "fal"
def test_explicit_config_wins(self, tmp_path, monkeypatch):
import yaml
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
(tmp_path / "config.yaml").write_text(
yaml.safe_dump({"image_gen": {"provider": "openai"}})
)
image_gen_registry.register_provider(_FakeProvider("fal"))
image_gen_registry.register_provider(_FakeProvider("openai"))
active = image_gen_registry.get_active_provider()
assert active is not None and active.name == "openai"
def test_missing_configured_provider_falls_back(self, tmp_path, monkeypatch):
import yaml
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
(tmp_path / "config.yaml").write_text(
yaml.safe_dump({"image_gen": {"provider": "replicate"}})
)
# Only FAL is registered — configured provider doesn't exist
image_gen_registry.register_provider(_FakeProvider("fal"))
active = image_gen_registry.get_active_provider()
# Falls back to FAL preference (legacy default) rather than None
assert active is not None and active.name == "fal"
def test_none_when_empty(self, tmp_path, monkeypatch):
monkeypatch.setenv("HERMES_HOME", str(tmp_path))
assert image_gen_registry.get_active_provider() is None

View file

@ -0,0 +1,115 @@
"""Regression guard: don't send Anthropic ``thinking`` to Kimi's /coding endpoint.
Kimi's ``api.kimi.com/coding`` endpoint speaks the Anthropic Messages protocol
but has its own thinking semantics. When ``thinking.enabled`` is present in
the request, Kimi validates the message history and requires every prior
assistant tool-call message to carry OpenAI-style ``reasoning_content``.
The Anthropic path never populates that field, and
``convert_messages_to_anthropic`` strips Anthropic thinking blocks on
third-party endpoints so after one turn with tool calls the next request
fails with HTTP 400::
thinking is enabled but reasoning_content is missing in assistant
tool call message at index N
Kimi on the chat_completions route handles ``thinking`` via ``extra_body`` in
``ChatCompletionsTransport`` (#13503). On the Anthropic route the right
thing to do is drop the parameter entirely and let Kimi drive reasoning
server-side.
"""
from __future__ import annotations
import pytest
class TestKimiCodingSkipsAnthropicThinking:
"""build_anthropic_kwargs must not inject ``thinking`` for Kimi /coding."""
@pytest.mark.parametrize(
"base_url",
[
"https://api.kimi.com/coding",
"https://api.kimi.com/coding/v1",
"https://api.kimi.com/coding/anthropic",
"https://api.kimi.com/coding/",
],
)
def test_kimi_coding_endpoint_omits_thinking(self, base_url: str) -> None:
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="kimi-k2.5",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
base_url=base_url,
)
assert "thinking" not in kwargs, (
"Anthropic thinking must not be sent to Kimi /coding — "
"endpoint requires reasoning_content on history we don't preserve."
)
assert "output_config" not in kwargs
def test_kimi_coding_with_explicit_disabled_also_omits(self) -> None:
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="kimi-k2.5",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": False},
base_url="https://api.kimi.com/coding",
)
assert "thinking" not in kwargs
def test_non_kimi_third_party_still_gets_thinking(self) -> None:
"""MiniMax and other third-party Anthropic endpoints must retain thinking."""
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="MiniMax-M2.7",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
base_url="https://api.minimax.io/anthropic",
)
assert "thinking" in kwargs
assert kwargs["thinking"]["type"] == "enabled"
def test_native_anthropic_still_gets_thinking(self) -> None:
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="claude-sonnet-4-20250514",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
base_url=None,
)
assert "thinking" in kwargs
def test_kimi_root_endpoint_unaffected(self) -> None:
"""Only the /coding route is special-cased — plain api.kimi.com is not.
``api.kimi.com`` without ``/coding`` uses the chat_completions transport
(see runtime_provider._detect_api_mode_for_url); build_anthropic_kwargs
should never see it, but if it somehow does we should not suppress
thinking there that path has different semantics.
"""
from agent.anthropic_adapter import build_anthropic_kwargs
kwargs = build_anthropic_kwargs(
model="kimi-k2.5",
messages=[{"role": "user", "content": "hello"}],
tools=None,
max_tokens=4096,
reasoning_config={"enabled": True, "effort": "medium"},
base_url="https://api.kimi.com/v1",
)
assert "thinking" in kwargs

View file

@ -789,6 +789,24 @@ class TestPromptBuilderConstants:
assert "cron" in PLATFORM_HINTS
assert "cli" in PLATFORM_HINTS
def test_cli_hint_does_not_suggest_media_tags(self):
# Regression: MEDIA:/path tags are intercepted only by messaging
# gateway platforms. On the CLI they render as literal text and
# confuse users. The CLI hint must steer the agent away from them.
cli_hint = PLATFORM_HINTS["cli"]
assert "MEDIA:" in cli_hint, (
"CLI hint should mention MEDIA: in order to tell the agent "
"NOT to use it (negative guidance)."
)
# Must contain explicit "don't" language near the MEDIA reference.
assert any(
marker in cli_hint.lower()
for marker in ("do not emit media", "not intercepted", "do not", "don't")
), "CLI hint should explicitly discourage MEDIA: tags."
# Messaging hints should still advertise MEDIA: positively (sanity
# check that this test is calibrated correctly).
assert "include MEDIA:" in PLATFORM_HINTS["telegram"]
# =========================================================================
# Environment hints

View file

@ -193,7 +193,7 @@ class TestBuildChildProgressCallback:
# task_index=0 in a batch of 3 → prefix "[1]"
cb0 = _build_child_progress_callback(0, "test goal", parent, task_count=3)
cb0("web_search", "test")
cb0("tool.started", "web_search", "test", {})
output = buf.getvalue()
assert "[1]" in output
@ -201,7 +201,7 @@ class TestBuildChildProgressCallback:
buf.truncate(0)
buf.seek(0)
cb2 = _build_child_progress_callback(2, "test goal", parent, task_count=3)
cb2("web_search", "test")
cb2("tool.started", "web_search", "test", {})
output = buf.getvalue()
assert "[3]" in output

View file

@ -0,0 +1,164 @@
"""Tests for the BedrockTransport."""
import json
import pytest
from types import SimpleNamespace
from agent.transports import get_transport
from agent.transports.types import NormalizedResponse, ToolCall
@pytest.fixture
def transport():
import agent.transports.bedrock # noqa: F401
return get_transport("bedrock_converse")
class TestBedrockBasic:
def test_api_mode(self, transport):
assert transport.api_mode == "bedrock_converse"
def test_registered(self, transport):
assert transport is not None
class TestBedrockBuildKwargs:
def test_basic_kwargs(self, transport):
msgs = [{"role": "user", "content": "Hello"}]
kw = transport.build_kwargs(model="anthropic.claude-3-5-sonnet-20241022-v2:0", messages=msgs)
assert kw["modelId"] == "anthropic.claude-3-5-sonnet-20241022-v2:0"
assert kw["__bedrock_converse__"] is True
assert kw["__bedrock_region__"] == "us-east-1"
assert "messages" in kw
def test_custom_region(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="anthropic.claude-3-5-sonnet-20241022-v2:0",
messages=msgs,
region="eu-west-1",
)
assert kw["__bedrock_region__"] == "eu-west-1"
def test_max_tokens(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="anthropic.claude-3-5-sonnet-20241022-v2:0",
messages=msgs,
max_tokens=8192,
)
assert kw["inferenceConfig"]["maxTokens"] == 8192
class TestBedrockConvertTools:
def test_convert_tools(self, transport):
tools = [{
"type": "function",
"function": {
"name": "terminal",
"description": "Run commands",
"parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
}
}]
result = transport.convert_tools(tools)
assert len(result) == 1
assert result[0]["toolSpec"]["name"] == "terminal"
class TestBedrockValidate:
def test_none(self, transport):
assert transport.validate_response(None) is False
def test_raw_dict_valid(self, transport):
assert transport.validate_response({"output": {"message": {}}}) is True
def test_raw_dict_invalid(self, transport):
assert transport.validate_response({"error": "fail"}) is False
def test_normalized_valid(self, transport):
r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
assert transport.validate_response(r) is True
class TestBedrockMapFinishReason:
def test_end_turn(self, transport):
assert transport.map_finish_reason("end_turn") == "stop"
def test_tool_use(self, transport):
assert transport.map_finish_reason("tool_use") == "tool_calls"
def test_max_tokens(self, transport):
assert transport.map_finish_reason("max_tokens") == "length"
def test_guardrail(self, transport):
assert transport.map_finish_reason("guardrail_intervened") == "content_filter"
def test_unknown(self, transport):
assert transport.map_finish_reason("unknown") == "stop"
class TestBedrockNormalize:
def _make_bedrock_response(self, text="Hello", tool_calls=None, stop_reason="end_turn"):
"""Build a raw Bedrock converse response dict."""
content = []
if text:
content.append({"text": text})
if tool_calls:
for tc in tool_calls:
content.append({
"toolUse": {
"toolUseId": tc["id"],
"name": tc["name"],
"input": tc["input"],
}
})
return {
"output": {"message": {"role": "assistant", "content": content}},
"stopReason": stop_reason,
"usage": {"inputTokens": 10, "outputTokens": 5, "totalTokens": 15},
}
def test_text_response(self, transport):
raw = self._make_bedrock_response(text="Hello world")
nr = transport.normalize_response(raw)
assert isinstance(nr, NormalizedResponse)
assert nr.content == "Hello world"
assert nr.finish_reason == "stop"
def test_tool_call_response(self, transport):
raw = self._make_bedrock_response(
text=None,
tool_calls=[{"id": "tool_1", "name": "terminal", "input": {"command": "ls"}}],
stop_reason="tool_use",
)
nr = transport.normalize_response(raw)
assert nr.finish_reason == "tool_calls"
assert len(nr.tool_calls) == 1
assert nr.tool_calls[0].name == "terminal"
def test_already_normalized_response(self, transport):
"""Test normalize_response handles already-normalized SimpleNamespace (from dispatch site)."""
pre_normalized = SimpleNamespace(
choices=[SimpleNamespace(
message=SimpleNamespace(
content="Hello from Bedrock",
tool_calls=None,
reasoning=None,
reasoning_content=None,
),
finish_reason="stop",
)],
usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
)
nr = transport.normalize_response(pre_normalized)
assert isinstance(nr, NormalizedResponse)
assert nr.content == "Hello from Bedrock"
assert nr.finish_reason == "stop"
assert nr.usage is not None
assert nr.usage.prompt_tokens == 10

View file

@ -0,0 +1,349 @@
"""Tests for the ChatCompletionsTransport."""
import pytest
from types import SimpleNamespace
from agent.transports import get_transport
from agent.transports.types import NormalizedResponse, ToolCall
@pytest.fixture
def transport():
import agent.transports.chat_completions # noqa: F401
return get_transport("chat_completions")
class TestChatCompletionsBasic:
def test_api_mode(self, transport):
assert transport.api_mode == "chat_completions"
def test_registered(self, transport):
assert transport is not None
def test_convert_tools_identity(self, transport):
tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
assert transport.convert_tools(tools) is tools
def test_convert_messages_no_codex_leaks(self, transport):
msgs = [{"role": "user", "content": "hi"}]
result = transport.convert_messages(msgs)
assert result is msgs # no copy needed
def test_convert_messages_strips_codex_fields(self, transport):
msgs = [
{"role": "assistant", "content": "ok", "codex_reasoning_items": [{"id": "rs_1"}],
"tool_calls": [{"id": "call_1", "call_id": "call_1", "response_item_id": "fc_1",
"type": "function", "function": {"name": "t", "arguments": "{}"}}]},
]
result = transport.convert_messages(msgs)
assert "codex_reasoning_items" not in result[0]
assert "call_id" not in result[0]["tool_calls"][0]
assert "response_item_id" not in result[0]["tool_calls"][0]
# Original list untouched (deepcopy-on-demand)
assert "codex_reasoning_items" in msgs[0]
class TestChatCompletionsBuildKwargs:
def test_basic_kwargs(self, transport):
msgs = [{"role": "user", "content": "Hello"}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, timeout=30.0)
assert kw["model"] == "gpt-4o"
assert kw["messages"][0]["content"] == "Hello"
assert kw["timeout"] == 30.0
def test_developer_role_swap(self, transport):
msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="gpt-5.4", messages=msgs, model_lower="gpt-5.4")
assert kw["messages"][0]["role"] == "developer"
def test_no_developer_swap_for_non_gpt5(self, transport):
msgs = [{"role": "system", "content": "You are helpful"}, {"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="claude-sonnet-4", messages=msgs, model_lower="claude-sonnet-4")
assert kw["messages"][0]["role"] == "system"
def test_tools_included(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
tools = [{"type": "function", "function": {"name": "test", "parameters": {}}}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, tools=tools)
assert kw["tools"] == tools
def test_openrouter_provider_prefs(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
is_openrouter=True,
provider_preferences={"only": ["openai"]},
)
assert kw["extra_body"]["provider"] == {"only": ["openai"]}
def test_nous_tags(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, is_nous=True)
assert kw["extra_body"]["tags"] == ["product=hermes-agent"]
def test_reasoning_default(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
supports_reasoning=True,
)
assert kw["extra_body"]["reasoning"] == {"enabled": True, "effort": "medium"}
def test_nous_omits_disabled_reasoning(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
supports_reasoning=True,
is_nous=True,
reasoning_config={"enabled": False},
)
# Nous rejects enabled=false; reasoning omitted entirely
assert "reasoning" not in kw.get("extra_body", {})
def test_ollama_num_ctx(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="llama3", messages=msgs,
ollama_num_ctx=32768,
)
assert kw["extra_body"]["options"]["num_ctx"] == 32768
def test_custom_think_false(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="qwen3", messages=msgs,
is_custom_provider=True,
reasoning_config={"effort": "none"},
)
assert kw["extra_body"]["think"] is False
def test_max_tokens_with_fn(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
max_tokens=4096,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
assert kw["max_tokens"] == 4096
def test_ephemeral_overrides_max_tokens(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
max_tokens=4096,
ephemeral_max_output_tokens=2048,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
assert kw["max_tokens"] == 2048
def test_nvidia_default_max_tokens(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="glm-4.7", messages=msgs,
is_nvidia_nim=True,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
# NVIDIA default: 16384
assert kw["max_tokens"] == 16384
def test_qwen_default_max_tokens(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="qwen3-coder-plus", messages=msgs,
is_qwen_portal=True,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
# Qwen default: 65536
assert kw["max_tokens"] == 65536
def test_anthropic_max_output_for_claude_on_aggregator(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="anthropic/claude-sonnet-4.6", messages=msgs,
is_openrouter=True,
anthropic_max_output=64000,
)
# Set as plain max_tokens (not via fn) because the aggregator proxies to
# Anthropic Messages API which requires the field.
assert kw["max_tokens"] == 64000
def test_request_overrides_last(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-4o", messages=msgs,
request_overrides={"service_tier": "priority"},
)
assert kw["service_tier"] == "priority"
def test_fixed_temperature(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, fixed_temperature=0.6)
assert kw["temperature"] == 0.6
def test_omit_temperature(self, transport):
msgs = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="gpt-4o", messages=msgs, omit_temperature=True, fixed_temperature=0.5)
# omit wins
assert "temperature" not in kw
class TestChatCompletionsKimi:
"""Regression tests for the Kimi/Moonshot quirks migrated into the transport."""
def test_kimi_max_tokens_default(self, transport):
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
is_kimi=True,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
# Kimi CLI default: 32000
assert kw["max_tokens"] == 32000
def test_kimi_reasoning_effort_top_level(self, transport):
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
is_kimi=True,
reasoning_config={"effort": "high"},
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
# Kimi requires reasoning_effort as a top-level parameter
assert kw["reasoning_effort"] == "high"
def test_kimi_reasoning_effort_omitted_when_thinking_disabled(self, transport):
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
is_kimi=True,
reasoning_config={"enabled": False},
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
# Mirror Kimi CLI: omit reasoning_effort entirely when thinking off
assert "reasoning_effort" not in kw
def test_kimi_thinking_enabled_extra_body(self, transport):
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
is_kimi=True,
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
assert kw["extra_body"]["thinking"] == {"type": "enabled"}
def test_kimi_thinking_disabled_extra_body(self, transport):
kw = transport.build_kwargs(
model="kimi-k2", messages=[{"role": "user", "content": "Hi"}],
is_kimi=True,
reasoning_config={"enabled": False},
max_tokens_param_fn=lambda n: {"max_tokens": n},
)
assert kw["extra_body"]["thinking"] == {"type": "disabled"}
class TestChatCompletionsValidate:
def test_none(self, transport):
assert transport.validate_response(None) is False
def test_no_choices(self, transport):
r = SimpleNamespace(choices=None)
assert transport.validate_response(r) is False
def test_empty_choices(self, transport):
r = SimpleNamespace(choices=[])
assert transport.validate_response(r) is False
def test_valid(self, transport):
r = SimpleNamespace(choices=[SimpleNamespace(message=SimpleNamespace(content="hi"))])
assert transport.validate_response(r) is True
class TestChatCompletionsNormalize:
def test_text_response(self, transport):
r = SimpleNamespace(
choices=[SimpleNamespace(
message=SimpleNamespace(content="Hello", tool_calls=None, reasoning_content=None),
finish_reason="stop",
)],
usage=SimpleNamespace(prompt_tokens=10, completion_tokens=5, total_tokens=15),
)
nr = transport.normalize_response(r)
assert isinstance(nr, NormalizedResponse)
assert nr.content == "Hello"
assert nr.finish_reason == "stop"
assert nr.tool_calls is None
def test_tool_call_response(self, transport):
tc = SimpleNamespace(
id="call_123",
function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
)
r = SimpleNamespace(
choices=[SimpleNamespace(
message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
finish_reason="tool_calls",
)],
usage=SimpleNamespace(prompt_tokens=10, completion_tokens=20, total_tokens=30),
)
nr = transport.normalize_response(r)
assert len(nr.tool_calls) == 1
assert nr.tool_calls[0].name == "terminal"
assert nr.tool_calls[0].id == "call_123"
def test_tool_call_extra_content_preserved(self, transport):
"""Gemini 3 thinking models attach extra_content with thought_signature
on tool_calls. Without this replay on the next turn, the API rejects
the request with 400. The transport MUST surface extra_content so the
agent loop can write it back into the assistant message."""
tc = SimpleNamespace(
id="call_gem",
function=SimpleNamespace(name="terminal", arguments='{"command": "ls"}'),
extra_content={"google": {"thought_signature": "SIG_ABC123"}},
)
r = SimpleNamespace(
choices=[SimpleNamespace(
message=SimpleNamespace(content=None, tool_calls=[tc], reasoning_content=None),
finish_reason="tool_calls",
)],
usage=None,
)
nr = transport.normalize_response(r)
assert nr.tool_calls[0].provider_data == {
"extra_content": {"google": {"thought_signature": "SIG_ABC123"}}
}
def test_reasoning_content_preserved_separately(self, transport):
"""DeepSeek/Moonshot use reasoning_content distinct from reasoning.
Don't merge them — the thinking-prefill retry check reads each field
separately."""
r = SimpleNamespace(
choices=[SimpleNamespace(
message=SimpleNamespace(
content=None, tool_calls=None,
reasoning="summary text",
reasoning_content="detailed scratchpad",
),
finish_reason="stop",
)],
usage=None,
)
nr = transport.normalize_response(r)
assert nr.reasoning == "summary text"
assert nr.provider_data == {"reasoning_content": "detailed scratchpad"}
class TestChatCompletionsCacheStats:
def test_no_usage(self, transport):
r = SimpleNamespace(usage=None)
assert transport.extract_cache_stats(r) is None
def test_no_details(self, transport):
r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=None))
assert transport.extract_cache_stats(r) is None
def test_with_cache(self, transport):
details = SimpleNamespace(cached_tokens=500, cache_write_tokens=100)
r = SimpleNamespace(usage=SimpleNamespace(prompt_tokens_details=details))
result = transport.extract_cache_stats(r)
assert result == {"cached_tokens": 500, "creation_tokens": 100}

View file

@ -0,0 +1,220 @@
"""Tests for the ResponsesApiTransport (Codex)."""
import json
import pytest
from types import SimpleNamespace
from agent.transports import get_transport
from agent.transports.types import NormalizedResponse, ToolCall
@pytest.fixture
def transport():
import agent.transports.codex # noqa: F401
return get_transport("codex_responses")
class TestCodexTransportBasic:
def test_api_mode(self, transport):
assert transport.api_mode == "codex_responses"
def test_registered_on_import(self, transport):
assert transport is not None
def test_convert_tools(self, transport):
tools = [{
"type": "function",
"function": {
"name": "terminal",
"description": "Run a command",
"parameters": {"type": "object", "properties": {"command": {"type": "string"}}},
}
}]
result = transport.convert_tools(tools)
assert len(result) == 1
assert result[0]["type"] == "function"
assert result[0]["name"] == "terminal"
class TestCodexBuildKwargs:
def test_basic_kwargs(self, transport):
messages = [
{"role": "system", "content": "You are helpful."},
{"role": "user", "content": "Hello"},
]
kw = transport.build_kwargs(
model="gpt-5.4",
messages=messages,
tools=[],
)
assert kw["model"] == "gpt-5.4"
assert kw["instructions"] == "You are helpful."
assert "input" in kw
assert kw["store"] is False
def test_system_extracted_from_messages(self, transport):
messages = [
{"role": "system", "content": "Custom system prompt"},
{"role": "user", "content": "Hi"},
]
kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
assert kw["instructions"] == "Custom system prompt"
def test_no_system_uses_default(self, transport):
messages = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(model="gpt-5.4", messages=messages, tools=[])
assert kw["instructions"] # should be non-empty default
def test_reasoning_config(self, transport):
messages = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-5.4", messages=messages, tools=[],
reasoning_config={"effort": "high"},
)
assert kw.get("reasoning", {}).get("effort") == "high"
def test_reasoning_disabled(self, transport):
messages = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-5.4", messages=messages, tools=[],
reasoning_config={"enabled": False},
)
assert "reasoning" not in kw or kw.get("include") == []
def test_session_id_sets_cache_key(self, transport):
messages = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-5.4", messages=messages, tools=[],
session_id="test-session-123",
)
assert kw.get("prompt_cache_key") == "test-session-123"
def test_github_responses_no_cache_key(self, transport):
messages = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-5.4", messages=messages, tools=[],
session_id="test-session",
is_github_responses=True,
)
assert "prompt_cache_key" not in kw
def test_max_tokens(self, transport):
messages = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-5.4", messages=messages, tools=[],
max_tokens=4096,
)
assert kw.get("max_output_tokens") == 4096
def test_codex_backend_no_max_output_tokens(self, transport):
messages = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-5.4", messages=messages, tools=[],
max_tokens=4096,
is_codex_backend=True,
)
assert "max_output_tokens" not in kw
def test_xai_headers(self, transport):
messages = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="grok-3", messages=messages, tools=[],
session_id="conv-123",
is_xai_responses=True,
)
assert kw.get("extra_headers", {}).get("x-grok-conv-id") == "conv-123"
def test_minimal_effort_clamped(self, transport):
messages = [{"role": "user", "content": "Hi"}]
kw = transport.build_kwargs(
model="gpt-5.4", messages=messages, tools=[],
reasoning_config={"effort": "minimal"},
)
# "minimal" should be clamped to "low"
assert kw.get("reasoning", {}).get("effort") == "low"
class TestCodexValidateResponse:
def test_none_response(self, transport):
assert transport.validate_response(None) is False
def test_empty_output(self, transport):
r = SimpleNamespace(output=[], output_text=None)
assert transport.validate_response(r) is False
def test_valid_output(self, transport):
r = SimpleNamespace(output=[{"type": "message", "content": []}])
assert transport.validate_response(r) is True
def test_output_text_fallback_not_valid(self, transport):
"""validate_response is strict — output_text doesn't make it valid.
The caller handles output_text fallback with diagnostic logging."""
r = SimpleNamespace(output=None, output_text="Some text")
assert transport.validate_response(r) is False
class TestCodexMapFinishReason:
def test_completed(self, transport):
assert transport.map_finish_reason("completed") == "stop"
def test_incomplete(self, transport):
assert transport.map_finish_reason("incomplete") == "length"
def test_failed(self, transport):
assert transport.map_finish_reason("failed") == "stop"
def test_unknown(self, transport):
assert transport.map_finish_reason("unknown_status") == "stop"
class TestCodexNormalizeResponse:
def test_text_response(self, transport):
"""Normalize a simple text Codex response."""
r = SimpleNamespace(
output=[
SimpleNamespace(
type="message",
role="assistant",
content=[SimpleNamespace(type="output_text", text="Hello world")],
status="completed",
),
],
status="completed",
incomplete_details=None,
usage=SimpleNamespace(input_tokens=10, output_tokens=5,
input_tokens_details=None, output_tokens_details=None),
)
nr = transport.normalize_response(r)
assert isinstance(nr, NormalizedResponse)
assert nr.content == "Hello world"
assert nr.finish_reason == "stop"
def test_tool_call_response(self, transport):
"""Normalize a Codex response with tool calls."""
r = SimpleNamespace(
output=[
SimpleNamespace(
type="function_call",
call_id="call_abc123",
name="terminal",
arguments=json.dumps({"command": "ls"}),
id="fc_abc123",
status="completed",
),
],
status="completed",
incomplete_details=None,
usage=SimpleNamespace(input_tokens=10, output_tokens=20,
input_tokens_details=None, output_tokens_details=None),
)
nr = transport.normalize_response(r)
assert nr.finish_reason == "tool_calls"
assert len(nr.tool_calls) == 1
tc = nr.tool_calls[0]
assert tc.name == "terminal"
assert '"command"' in tc.arguments